"""
Quadrat statistics for planar point patterns
- First argument everywhere is a NumPy ndarray of point coordinates (n, 2).
- MBB is computed internally from the array.
- Optional `window` (shapely geometry). If omitted, uses the MBB rectangle.
- Optional `rng` for reproducible simulation-based inference.
- Rectangle and hexagon grids supported.
- Plotting is self-contained (matplotlib only): scatters points, draws window, draws grid,
annotates counts, and optionally annotates per-cell chi-square contributions.
"""
__author__ = "Serge Rey, Wei Kang, Hu Shao"
__all__ = ["RectangleM", "HexagonM", "QStatistic"]
import math
import geopandas as gpd
import numpy as np
import scipy.stats
from matplotlib import pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon as MplPolygon
from shapely.geometry import MultiPolygon, Polygon, box
from .random import poisson
# -----------------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------------
def _as_points_array(points) -> np.ndarray:
"""
Normalize input into an (n, 2) float ndarray.
Accepts:
- array-like shaped (n, 2)
- GeoPandas GeoSeries / GeometryArray of Points (and optionally MultiPoints)
- PointPattern
Returns:
- np.ndarray of shape (n, 2)
Raises:
- ValueError for empty inputs or non-2D coordinate inputs
- TypeError for unsupported geometry types
"""
if isinstance(points, gpd.GeoDataFrame):
points = points.geometry
if isinstance(points, gpd.GeoSeries):
points = points.dropna()
if not points.geom_type.isin(["Point", "MultiPoint"]).all():
raise TypeError(
"GeoSeries must contain Point geometries (optionally MultiPoint). "
f"Got geometry type: {getattr(points, 'geom_type', type(points).__name__)}"
)
coords = points.get_coordinates()
if len(coords) == 0:
raise ValueError("no non-empty Point geometries.")
return coords.values.astype(float)
# ---- PointPattern ---
if hasattr(points, "points"):
return points.points.values
# ---- Generic array-like path ----
pts = np.asarray(points)
if pts.ndim != 2 or pts.shape[1] != 2:
raise ValueError(f"points must be a (n, 2) array-like. Got shape {pts.shape}.")
if pts.size == 0:
raise ValueError("points is empty; cannot compute mbb.")
return pts
def _compute_mbb(points: np.ndarray) -> np.ndarray:
x_min = float(np.min(points[:, 0]))
y_min = float(np.min(points[:, 1]))
x_max = float(np.max(points[:, 0]))
y_max = float(np.max(points[:, 1]))
return np.array([x_min, y_min, x_max, y_max], dtype=float)
def _ensure_window(window, mbb):
if window is None:
return box(mbb[0], mbb[1], mbb[2], mbb[3])
return window
def _coerce_rng(rng):
"""Coerce rng input to a NumPy Generator.
Parameters
----------
rng : None | int | numpy.random.Generator | numpy.random.RandomState | numpy.random.BitGenerator
Random number generator specification.
Returns
-------
numpy.random.Generator
"""
if rng is None:
return np.random.default_rng()
if isinstance(rng, np.random.Generator):
return rng
# Accept legacy RandomState by deriving a seed from it.
if isinstance(rng, np.random.RandomState):
# Draw a seed deterministically from the RandomState stream.
seed = int(rng.randint(0, 2**32 - 1, dtype=np.uint32))
return np.random.default_rng(seed)
# If a BitGenerator is passed, wrap it.
if isinstance(rng, np.random.BitGenerator):
return np.random.Generator(rng)
# Int-like seeds
if isinstance(rng, (int, np.integer)):
return np.random.default_rng(int(rng))
# Fall back to NumPy’s coercion (e.g., SeedSequence)
return np.random.default_rng(rng)
def _window_to_paths(window_geom):
"""Return list of (x, y) arrays for plotting boundary(ies)."""
paths = []
if window_geom is None:
return paths
if isinstance(window_geom, Polygon):
x, y = window_geom.exterior.xy
paths.append((np.asarray(x), np.asarray(y)))
for ring in window_geom.interiors:
x, y = ring.xy
paths.append((np.asarray(x), np.asarray(y)))
return paths
if isinstance(window_geom, MultiPolygon):
for poly in window_geom.geoms:
paths.extend(_window_to_paths(poly))
return paths
if hasattr(window_geom, "geoms"):
for g in window_geom.geoms:
paths.extend(_window_to_paths(g))
return paths
def _scatter_points(ax, points):
ax.scatter(points[:, 0], points[:, 1], s=10)
def _draw_window(ax, window_geom):
for x, y in _window_to_paths(window_geom):
ax.plot(x, y, lw=1.5)
def _cell_center_from_bounds(x0, y0, x1, y1):
return (0.5 * (x0 + x1), 0.5 * (y0 + y1))
# -----------------------------------------------------------------------------
# Rectangle grid
# -----------------------------------------------------------------------------
[docs]
class RectangleM:
"""
Rectangle grid structure for quadrat-based method.
"""
[docs]
def __init__(
self,
pp,
count_column=3,
count_row=3,
rectangle_width=0,
rectangle_height=0,
window=None,
):
self.points = _as_points_array(pp)
self.mbb = _compute_mbb(self.points)
self.window = _ensure_window(window, self.mbb)
x_range = self.mbb[2] - self.mbb[0]
y_range = self.mbb[3] - self.mbb[1]
if rectangle_width and rectangle_height:
self.rectangle_width = float(rectangle_width)
self.rectangle_height = float(rectangle_height)
self.count_column = int(math.ceil(x_range / self.rectangle_width))
self.count_row = int(math.ceil(y_range / self.rectangle_height))
else:
self.count_column = int(count_column)
self.count_row = int(count_row)
self.rectangle_width = x_range / float(self.count_column)
self.rectangle_height = y_range / float(self.count_row)
self.num = self.count_column * self.count_row
[docs]
def point_location_sta(self):
dict_id_count = {
j + i * self.count_column: 0
for i in range(self.count_row)
for j in range(self.count_column)
}
x_min, y_min = self.mbb[0], self.mbb[1]
for point in self.points:
index_x = (point[0] - x_min) // self.rectangle_width
index_y = (point[1] - y_min) // self.rectangle_height
if index_x == self.count_column:
index_x -= 1
if index_y == self.count_row:
index_y -= 1
cell_id = int(index_y) * self.count_column + int(index_x)
dict_id_count[cell_id] += 1
return dict_id_count
def _rect_cell_bounds(self, ix, iy):
x0 = self.mbb[0] + ix * self.rectangle_width
y0 = self.mbb[1] + iy * self.rectangle_height
x1 = x0 + self.rectangle_width
y1 = y0 + self.rectangle_height
return x0, y0, x1, y1
def _rect_cell_polygon(self, ix, iy):
x0, y0, x1, y1 = self._rect_cell_bounds(ix, iy)
return box(x0, y0, x1, y1)
def _iter_cells(self):
for iy in range(self.count_row):
for ix in range(self.count_column):
cell_id = ix + iy * self.count_column
poly = self._rect_cell_polygon(ix, iy)
x0, y0, x1, y1 = self._rect_cell_bounds(ix, iy)
cx, cy = _cell_center_from_bounds(x0, y0, x1, y1)
yield cell_id, poly, (cx, cy)
[docs]
def plot(
self,
title="Quadrat Count",
show="counts",
chi2_contrib=None,
):
fig, ax = plt.subplots()
ax.set_title(title)
_scatter_points(ax, self.points)
_draw_window(ax, self.window)
dict_id_count = self.point_location_sta()
patches = []
ann_xy = []
cell_ids = []
for cell_id, poly, (cx, cy) in self._iter_cells():
patches.append(MplPolygon(np.asarray(poly.exterior.coords), closed=True))
ann_xy.append((cx, cy))
cell_ids.append(cell_id)
pc = PatchCollection(patches, linewidths=1.0, edgecolor="red", facecolor="none")
ax.add_collection(pc)
if show == "counts":
for (cx, cy), cell_id in zip(ann_xy, cell_ids):
ax.text(
cx, cy, str(dict_id_count.get(cell_id, 0)), ha="center", va="center"
)
elif show == "chi2":
if chi2_contrib is None:
raise ValueError('chi2_contrib must be provided when show="chi2".')
chi2_contrib = np.asarray(chi2_contrib, dtype=float)
if chi2_contrib.shape[0] != len(cell_ids):
raise ValueError(
"chi2_contrib length must match number of plotted cells "
f"({len(cell_ids)}). Got {chi2_contrib.shape[0]}."
)
pc.set_array(chi2_contrib)
pc.set_alpha(0.6)
pc.set_facecolor(None)
plt.colorbar(pc, ax=ax, label="Chi-square contribution")
for (cx, cy), v in zip(ann_xy, chi2_contrib):
ax.text(cx, cy, f"{v:.2f}", ha="center", va="center")
else:
raise ValueError('show must be "counts" or "chi2".')
ax.set_aspect("equal", adjustable="box")
return ax, cell_ids
# -----------------------------------------------------------------------------
# Hexagon grid
# -----------------------------------------------------------------------------
[docs]
class HexagonM:
"""
Hexagon grid structure for quadrat-based method.
"""
[docs]
def __init__(self, pp, lh, window=None):
self.points = _as_points_array(pp)
self.h_length = float(lh)
self.mbb = _compute_mbb(self.points)
self.window = _ensure_window(window, self.mbb)
range_x = self.mbb[2] - self.mbb[0]
range_y = self.mbb[3] - self.mbb[1]
self.count_column = 1
if self.h_length / 2.0 < range_x:
temp = math.ceil((range_x - self.h_length / 2.0) / (1.5 * self.h_length))
self.count_column += int(temp)
self.semi_height = self.h_length * math.cos(math.pi / 6.0)
self.count_row_even = 1
if self.semi_height < range_y:
temp = math.ceil((range_y - self.semi_height) / (2.0 * self.semi_height))
self.count_row_even += int(temp)
self.count_row_odd = int(math.ceil(range_y / (2.0 * self.semi_height)))
self.num = self.count_row_odd * (
(self.count_column // 2) + (self.count_column % 2)
) + self.count_row_even * (self.count_column // 2)
[docs]
def point_location_sta(self):
semi_cell_length = self.h_length / 2.0
dict_id_count = {}
for i in range(self.count_row_even):
for j in range(self.count_column):
if (
self.count_row_even != self.count_row_odd
and i == self.count_row_even - 1
and j % 2 == 1
):
continue
dict_id_count[j + i * self.count_column] = 0
x_min = self.mbb[0]
y_min = self.mbb[1]
for point in self.points:
intercept_degree_x = (point[0] - x_min) // semi_cell_length
possible_y_index_even = int(
(point[1] + self.semi_height - y_min) / (2.0 * self.semi_height)
)
possible_y_index_odd = int((point[1] - y_min) / (2.0 * self.semi_height))
if intercept_degree_x % 3 != 1:
center_index_x = int((intercept_degree_x + 1) // 3)
center_index_y = possible_y_index_odd
if center_index_x % 2 == 0:
center_index_y = possible_y_index_even
dict_id_count[center_index_x + center_index_y * self.count_column] += 1
else:
center_index_x = int(intercept_degree_x // 3)
center_x = center_index_x * semi_cell_length * 3.0 + x_min
center_index_y = possible_y_index_odd
center_y = (center_index_y * 2.0 + 1.0) * self.semi_height + y_min
if center_index_x % 2 == 0:
center_index_y = possible_y_index_even
center_y = center_index_y * 2.0 * self.semi_height + y_min
if point[1] > center_y:
x0, y0 = center_x + self.h_length, center_y
x1, y1 = center_x + semi_cell_length, center_y + self.semi_height
indicator = -(
point[1]
- (
(y0 - y1) / (x0 - x1) * point[0]
+ (x0 * y1 - x1 * y0) / (x0 - x1)
)
)
else:
x0, y0 = center_x + semi_cell_length, center_y - self.semi_height
x1, y1 = center_x + self.h_length, center_y
indicator = point[1] - (
(y0 - y1) / (x0 - x1) * point[0]
+ (x0 * y1 - x1 * y0) / (x0 - x1)
)
if indicator <= 0:
center_index_x += 1
center_index_y = possible_y_index_odd
if center_index_x % 2 == 0:
center_index_y = possible_y_index_even
dict_id_count[center_index_x + center_index_y * self.count_column] += 1
return dict_id_count
def _hex_vertices(self, cx, cy):
sh = self.semi_height
lh = self.h_length
return np.array(
[
[cx + lh, cy],
[cx + lh / 2.0, cy + sh],
[cx - lh / 2.0, cy + sh],
[cx - lh, cy],
[cx - lh / 2.0, cy - sh],
[cx + lh / 2.0, cy - sh],
[cx + lh, cy],
],
dtype=float,
)
def _iter_cells(self):
dict_id_count = self.point_location_sta()
x_min = self.mbb[0]
y_min = self.mbb[1]
for cell_id in dict_id_count.keys():
ix = cell_id % self.count_column
iy = cell_id // self.count_column
cx = ix * self.h_length / 2.0 * 3.0 + x_min
cy = iy * self.semi_height * 2.0 + y_min
if ix % 2 == 1:
cy = (iy * 2.0 + 1.0) * self.semi_height + y_min
verts = self._hex_vertices(cx, cy)
poly = Polygon(verts[:-1])
yield cell_id, poly, (cx, cy)
[docs]
def plot(
self,
title="Quadrat Count",
show="counts",
chi2_contrib=None,
):
fig, ax = plt.subplots()
ax.set_title(title)
_scatter_points(ax, self.points)
_draw_window(ax, self.window)
dict_id_count = self.point_location_sta()
patches = []
ann_xy = []
cell_ids = []
for cell_id, poly, (cx, cy) in self._iter_cells():
patches.append(MplPolygon(np.asarray(poly.exterior.coords), closed=True))
ann_xy.append((cx, cy))
cell_ids.append(cell_id)
pc = PatchCollection(patches, linewidths=1.0, edgecolor="red", facecolor="none")
ax.add_collection(pc)
if show == "counts":
for (cx, cy), cell_id in zip(ann_xy, cell_ids):
ax.text(
cx, cy, str(dict_id_count.get(cell_id, 0)), ha="center", va="center"
)
elif show == "chi2":
if chi2_contrib is None:
raise ValueError('chi2_contrib must be provided when show="chi2".')
chi2_contrib = np.asarray(chi2_contrib, dtype=float)
if chi2_contrib.shape[0] != len(cell_ids):
raise ValueError(
"chi2_contrib length must match number of plotted cells "
f"({len(cell_ids)}). Got {chi2_contrib.shape[0]}."
)
pc.set_array(chi2_contrib)
pc.set_alpha(0.6)
pc.set_facecolor(None)
plt.colorbar(pc, ax=ax, label="Chi-square contribution")
for (cx, cy), v in zip(ann_xy, chi2_contrib):
ax.text(cx, cy, f"{v:.2f}", ha="center", va="center")
else:
raise ValueError('show must be "counts" or "chi2".')
ax.set_aspect("equal", adjustable="box")
return ax, cell_ids
# -----------------------------------------------------------------------------
# Quadrat statistic
# -----------------------------------------------------------------------------
[docs]
class QStatistic:
"""Pearson chi-square quadrat test for complete spatial randomness (CSR).
This class partitions the study region into quadrats (rectangles or hexagons),
counts the number of events falling in each quadrat, and evaluates departure
from CSR using a Pearson chi-square statistic under an equal-intensity CSR
null model.
The primary outputs are the observed chi-square statistic and its analytical
p-value (from the chi-square distribution). Optionally, a simulation-based
(Monte Carlo) p-value can be computed by generating CSR realizations within
the study window and recomputing the chi-square statistic for each
realization.
Parameters
----------
pp : :class: `.PointPattern` or array_like
Event coordinates as an (n, 2) array-like of floats (x, y).
shape : {"rectangle", "hexagon"}, default="rectangle"
Quadrat tessellation type.
nx : int, default=3
Number of columns when `shape="rectangle"` and `rectangle_width` /
`rectangle_height` are not provided.
ny : int, default=3
Number of rows when `shape="rectangle"` and `rectangle_width` /
`rectangle_height` are not provided.
rectangle_width : float, default=0
Target rectangle width. Must be provided together with
`rectangle_height`. When both are provided and non-zero, `nx` and `ny`
are ignored and the grid dimensions are computed from the point-set MBB.
rectangle_height : float, default=0
Target rectangle height. Must be provided together with
`rectangle_width`. When both are provided and non-zero, `nx` and `ny`
are ignored and the grid dimensions are computed from the point-set MBB.
lh : float, default=10
Hexagon side length when `shape="hexagon"`.
realizations : int, default=0
Number of CSR simulations used to compute a Monte Carlo p-value.
If 0, no simulation-based inference is performed.
window : shapely geometry, optional
Study window (e.g., Polygon or MultiPolygon). If None, the window is
taken to be the axis-aligned MBB rectangle of `points`.
rng : None | int | numpy.random.Generator | numpy.random.RandomState, optional
Random number generator control for reproducible CSR simulations when
`realizations > 0`.
- None: create a new ``numpy.random.default_rng()``
- int: use as a seed for ``numpy.random.default_rng(seed)``
- Generator: used as-is
- RandomState: wrapped via ``numpy.random.default_rng(RandomState)``
Attributes
----------
points : numpy.ndarray
(n, 2) array of event coordinates.
mbb : numpy.ndarray
Bounding box of `points` as ``[xmin, ymin, xmax, ymax]``.
window : shapely geometry
Study window used for simulation and plotting.
shape : str
Quadrat tessellation type: "rectangle" or "hexagon".
rng : numpy.random.Generator
RNG used for CSR simulations (always stored as a Generator).
mr : RectangleM or HexagonM
Tessellation manager instance.
cell_ids : list of int
Cell identifiers included in the test statistic (all grid cells).
chi2 : float
Observed Pearson chi-square statistic.
df : int
Degrees of freedom for the analytical chi-square reference distribution,
equal to ``k - 1`` where ``k`` is the number of included cells.
chi2_pvalue : float
Analytical p-value from the chi-square distribution.
chi2_contrib : numpy.ndarray
Per-cell chi-square contributions aligned with `cell_ids`, computed as
``(O - E)^2 / E`` with ``E = mean(O)`` over included cells.
chi2_realizations : numpy.ndarray
Simulated chi-square statistics for CSR realizations. Present only when
`realizations > 0`.
chi2_r_pvalue : float
Monte Carlo p-value based on `chi2_realizations`, computed using the
standard +1 correction:
``( #{T_sim >= T_obs} + 1 ) / (realizations + 1)``.
Present only when `realizations > 0`.
Notes
-----
- The analytical test uses ``scipy.stats.chisquare`` with expected
counts equal across cells (i.e., ``E = mean(O)``). This
corresponds to a homogeneous CSR null with equal-area cells. For
irregular windows, a fully-correct analytical reference would
area-weight expectations.
- The simulation-based null generates CSR realizations within `window` with
intensity ``n / area(window)``. Reproducibility depends on passing `rng`
through to the underlying CSR generator (``poisson(..., rng=...)``).
See Also
--------
RectangleM : Rectangular tessellation over the point-set MBB.
HexagonM : Hexagonal tessellation over the point-set MBB.
"""
[docs]
def __init__(
self,
pp,
shape="rectangle",
nx=3,
ny=3,
rectangle_width=0,
rectangle_height=0,
lh=10,
realizations=0,
window=None,
rng=None,
):
self.points = _as_points_array(pp)
self.mbb = _compute_mbb(self.points)
self.window = _ensure_window(window, self.mbb)
self.shape = shape
self.rng = _coerce_rng(rng)
if shape == "rectangle":
self.mr = RectangleM(
self.points,
count_column=nx,
count_row=ny,
rectangle_width=rectangle_width,
rectangle_height=rectangle_height,
window=self.window,
)
elif shape == "hexagon":
self.mr = HexagonM(self.points, lh, window=self.window)
else:
raise ValueError('shape must be either "rectangle" or "hexagon".')
dict_id_count = self.mr.point_location_sta()
obs_counts = np.asarray(list(dict_id_count.values()), dtype=float)
self.cell_ids = list(dict_id_count.keys())
self.chi2, self.chi2_pvalue = scipy.stats.chisquare(obs_counts)
self.df = obs_counts.size - 1
expected = obs_counts.mean() if obs_counts.size else np.nan
if obs_counts.size and expected > 0:
self.chi2_contrib = (obs_counts - expected) ** 2 / expected
else:
self.chi2_contrib = np.full_like(obs_counts, np.nan, dtype=float)
# simulation-based inference under CSR (reproducible via rng)
if realizations and realizations > 0:
n = self.points.shape[0]
intensity = n / float(self.window.area)
reals = poisson(self.window, intensity, realizations, rng=self.rng)
chi2_realizations = []
for i in range(realizations):
ri = reals[i]
pts_i = getattr(ri, "points", ri)
pts_i = _as_points_array(pts_i)
if shape == "rectangle":
mr_temp = RectangleM(
pts_i,
count_column=self.mr.count_column,
count_row=self.mr.count_row,
window=self.window,
)
else:
mr_temp = HexagonM(pts_i, self.mr.h_length, window=self.window)
dtemp = mr_temp.point_location_sta()
sim_counts = np.asarray(list(dtemp.values()), dtype=float)
chi2_sim, _p = scipy.stats.chisquare(sim_counts)
chi2_realizations.append(chi2_sim)
self.chi2_realizations = np.asarray(chi2_realizations, dtype=float)
self.chi2_r_pvalue = (np.sum(self.chi2_realizations >= self.chi2) + 1.0) / (
realizations + 1.0
)
[docs]
def plot(self, title="Quadrat Count", show="counts"):
if show == "counts":
ax, _cell_ids = self.mr.plot(title=title, show="counts")
return ax
if show == "chi2":
# get plotted ids
_, plotted_ids = self.mr.plot(title=title, show="counts")
plt.close()
contrib_map = {cid: v for cid, v in zip(self.cell_ids, self.chi2_contrib)}
plotted_contrib = np.asarray(
[contrib_map.get(cid, np.nan) for cid in plotted_ids], dtype=float
)
ax, _ = self.mr.plot(title=title, show="chi2", chi2_contrib=plotted_contrib)
return ax
raise ValueError('show must be either "counts" or "chi2".')