Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/release/notes-dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@
- Update :attr:`squidpy.pl.var_by_distance` to show multiple variables on same plot.
[@LLehner](https://github.com/LLehner)
[#929](https://github.com/scverse/squidpy/pull/929)

## Bugfixes

- Use the correct normality variance for Geary's C in {func}`squidpy.gr.spatial_autocorr`,
fixing a miscalibrated analytic p-value (`pval_norm`) for `mode="geary"`.
[#1183](https://github.com/scverse/squidpy/issues/1183)
17 changes: 13 additions & 4 deletions src/squidpy/gr/_ppatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,11 +493,20 @@ def _analytic_pval(score: NDArrayA, g: spmatrix | NDArrayA, params: dict[str, An
s0, s1, s2 = _g_moments(g)
n = g.shape[0]
s02 = s0 * s0
n2 = n * n
v_num = n2 * s1 - n * s2 + 3 * s02
v_den = (n - 1) * (n + 1) * s02

Vscore_norm = v_num / v_den - (1.0 / (n - 1)) ** 2
if params["mode"] == SpatialAutocorr.GEARY.s:
# Geary's C and Moran's I have different sampling variances under the
# normality assumption (Cliff & Ord 1981). Use the Geary's C variance
# (matching pysal/esda ``Geary``); reusing Moran's variance here gives a
# miscalibrated analytic p-value (see #1183).
Vscore_norm = ((2 * s1 + s2) * (n - 1) - 4 * s02) / (2 * (n + 1) * s02)
else:
# Moran's I normality variance (Cliff & Ord 1981; pysal/esda ``Moran``).
n2 = n * n
v_num = n2 * s1 - n * s2 + 3 * s02
v_den = (n - 1) * (n + 1) * s02
Vscore_norm = v_num / v_den - (1.0 / (n - 1)) ** 2

seScore_norm = Vscore_norm ** (1 / 2.0)

z_norm = (score - params["expected"]) / seScore_norm
Expand Down
32 changes: 32 additions & 0 deletions tests/graph/test_ppatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,38 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
assert_frame_equal(df_1, df_2)


@pytest.mark.parametrize("mode", ["moran", "geary"])
def test_spatial_autocorr_var_norm_formula(dummy_adata: AnnData, mode: str):
"""Analytic ``var_norm`` must use the variance matching the chosen statistic.

Regression test for #1183: Geary's C and Moran's I have different sampling
variances under the normality assumption (Cliff & Ord 1981). Reusing Moran's
variance for Geary's C produced a miscalibrated analytic p-value.
"""
from sklearn.preprocessing import normalize

from squidpy.gr._ppatterns import _g_moments

uns_key = MORAN_K if mode == "moran" else GEARY_C
spatial_autocorr(dummy_adata, mode=mode, transformation=True, n_perms=None, seed=0)
var_norm = float(dummy_adata.uns[uns_key]["var_norm"].iloc[0])

# Reconstruct the exact (row-standardised) weight matrix the routine used.
g = dummy_adata.obsp["spatial_connectivities"].copy()
normalize(g, norm="l1", axis=1, copy=False)
s0, s1, s2 = _g_moments(g)
n = g.shape[0]
s02 = s0 * s0
moran_var = (n * n * s1 - n * s2 + 3 * s02) / ((n - 1) * (n + 1) * s02) - (1.0 / (n - 1)) ** 2
geary_var = ((2 * s1 + s2) * (n - 1) - 4 * s02) / (2 * (n + 1) * s02)

expected = moran_var if mode == "moran" else geary_var
np.testing.assert_allclose(var_norm, expected, rtol=1e-10)
if mode == "geary":
# the two formulas differ here, so the test would fail if Moran's were reused
assert not np.isclose(geary_var, moran_var, rtol=1e-3)


@pytest.mark.parametrize(
"attr,layer,genes",
[
Expand Down
Loading