diff --git a/docs/release/notes-dev.md b/docs/release/notes-dev.md index e5fda1ca7..dc15bfe08 100644 --- a/docs/release/notes-dev.md +++ b/docs/release/notes-dev.md @@ -6,3 +6,9 @@ - Update :attr:`squidpy.pl.var_by_distance` to show multiple variables on same plot. [@LLehner](https://github.com/LLehner) [#929](https://github.com/scverse/squidpy/pull/929) + +## Bugfixes + +- Use the correct normality variance for Geary's C in {func}`squidpy.gr.spatial_autocorr`, + fixing a miscalibrated analytic p-value (`pval_norm`) for `mode="geary"`. + [#1183](https://github.com/scverse/squidpy/issues/1183) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index c77085228..b5db41f60 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -493,11 +493,20 @@ def _analytic_pval(score: NDArrayA, g: spmatrix | NDArrayA, params: dict[str, An s0, s1, s2 = _g_moments(g) n = g.shape[0] s02 = s0 * s0 - n2 = n * n - v_num = n2 * s1 - n * s2 + 3 * s02 - v_den = (n - 1) * (n + 1) * s02 - Vscore_norm = v_num / v_den - (1.0 / (n - 1)) ** 2 + if params["mode"] == SpatialAutocorr.GEARY.s: + # Geary's C and Moran's I have different sampling variances under the + # normality assumption (Cliff & Ord 1981). Use the Geary's C variance + # (matching pysal/esda ``Geary``); reusing Moran's variance here gives a + # miscalibrated analytic p-value (see #1183). + Vscore_norm = ((2 * s1 + s2) * (n - 1) - 4 * s02) / (2 * (n + 1) * s02) + else: + # Moran's I normality variance (Cliff & Ord 1981; pysal/esda ``Moran``). + n2 = n * n + v_num = n2 * s1 - n * s2 + 3 * s02 + v_den = (n - 1) * (n + 1) * s02 + Vscore_norm = v_num / v_den - (1.0 / (n - 1)) ** 2 + seScore_norm = Vscore_norm ** (1 / 2.0) z_norm = (score - params["expected"]) / seScore_norm diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 01c2e3033..158ae6f5a 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -90,6 +90,38 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod assert_frame_equal(df_1, df_2) +@pytest.mark.parametrize("mode", ["moran", "geary"]) +def test_spatial_autocorr_var_norm_formula(dummy_adata: AnnData, mode: str): + """Analytic ``var_norm`` must use the variance matching the chosen statistic. + + Regression test for #1183: Geary's C and Moran's I have different sampling + variances under the normality assumption (Cliff & Ord 1981). Reusing Moran's + variance for Geary's C produced a miscalibrated analytic p-value. + """ + from sklearn.preprocessing import normalize + + from squidpy.gr._ppatterns import _g_moments + + uns_key = MORAN_K if mode == "moran" else GEARY_C + spatial_autocorr(dummy_adata, mode=mode, transformation=True, n_perms=None, seed=0) + var_norm = float(dummy_adata.uns[uns_key]["var_norm"].iloc[0]) + + # Reconstruct the exact (row-standardised) weight matrix the routine used. + g = dummy_adata.obsp["spatial_connectivities"].copy() + normalize(g, norm="l1", axis=1, copy=False) + s0, s1, s2 = _g_moments(g) + n = g.shape[0] + s02 = s0 * s0 + moran_var = (n * n * s1 - n * s2 + 3 * s02) / ((n - 1) * (n + 1) * s02) - (1.0 / (n - 1)) ** 2 + geary_var = ((2 * s1 + s2) * (n - 1) - 4 * s02) / (2 * (n + 1) * s02) + + expected = moran_var if mode == "moran" else geary_var + np.testing.assert_allclose(var_norm, expected, rtol=1e-10) + if mode == "geary": + # the two formulas differ here, so the test would fail if Moran's were reused + assert not np.isclose(geary_var, moran_var, rtol=1e-3) + + @pytest.mark.parametrize( "attr,layer,genes", [