Source code for sum_stat.lf_smf.independence

"""Statistical tests for magnitude–redshift independence in truncated samples.

Implements the Efron & Petrosian (1992) τ test and the Rauzy (2001) Tc/Tv
completeness tests as described in Johnston (2011).

Reference
---------
Efron & Petrosian (1992), ApJ 399, 345.
Rauzy (2001), MNRAS 324, 51.
Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757.
Johnston (2011), arXiv:1106.2039v3, §10.1–10.2, Eq. 155–166.
"""

from __future__ import annotations

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from scipy.stats import norm as _scipy_norm


[docs] def efron_petrosian_tau( abs_mag: np.ndarray, redshift: np.ndarray, mag_limit: float, cosmo: FlatLambdaCDM, weight: np.ndarray | None = None, ) -> dict[str, float]: """Efron–Petrosian τ statistic for testing LF/redshift independence. Tests the null hypothesis H₀ that absolute magnitude M is statistically independent of redshift z given the flux-limited selection truncation. Under H₀, τ ~ N(0, 1) (Johnston 2011 §10.1, Eq. 158–159):: τ = Σ_i (R_i − E_i) / sqrt(Σ_i V_i) For each galaxy i the associated set is (Johnston 2011 Eq. 156):: J_i = {j : z_j ≤ z_i AND M_j < M_faint(z_i)} i.e. galaxies at redshifts no greater than z_i whose magnitude is observable at z_i. R_i is the rank of M_i within {M_j : j ∈ J_i} ordered by brightness, E_i = (\|J_i\| + 1)/2 is the expected rank under H₀, and V_i = (\|J_i\|² − 1)/12 is its variance. A positive τ means brighter galaxies are found preferentially at higher redshift (positive luminosity evolution). Parameters ---------- abs_mag : ndarray, shape (N,) Absolute magnitudes. redshift : ndarray, shape (N,) Redshifts. mag_limit : float Survey apparent-magnitude limit [mag]. cosmo : FlatLambdaCDM Cosmology for distance modulus. weight : ndarray, optional Per-galaxy weights (currently unused in the rank computation but reserved for future weighted extensions). Returns ------- dict with keys: tau : float τ statistic; τ ~ N(0,1) under H₀. p_value : float Two-tailed p-value P(\|Z\| ≥ \|τ\|) under H₀. numerator : float Σ_i (R_i − E_i). denominator : float sqrt(Σ_i V_i). n_used : int Number of galaxies with \|J_i\| ≥ 2 contributing to τ. References ---------- Efron & Petrosian (1992), ApJ 399, 345. Johnston (2011), arXiv:1106.2039v3, §10.1, Eq. 155–159. """ abs_mag = np.asarray(abs_mag, dtype=np.float64) redshift = np.asarray(redshift, dtype=np.float64) mu = cosmo.distmod(redshift).value M_faint = mag_limit - mu # faintest observable abs mag at each z # Vectorised: J_mask[i, j] = True if j ∈ J_i # J_i = {j : z_j ≤ z_i AND M_j < M_faint(z_i)} # i.e., galaxies at z ≤ z_i whose magnitude is observable at z_i. # This is the standard EP (1992) associated-set definition for # flux-limited surveys (Johnston 2011 §10.1, Eq. 156). J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None]) J_size = J_mask.sum(axis=1) # |J_i| # R_i = |{j ∈ J_i : M_j ≤ M_i}| (rank of M_i within J_i by brightness) R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1) valid = J_size >= 2 n_used = int(valid.sum()) if n_used == 0: return {"tau": 0.0, "p_value": 1.0, "numerator": 0.0, "denominator": 0.0, "n_used": 0} Jv = J_size[valid].astype(float) Rv = R[valid].astype(float) E = (Jv + 1.0) / 2.0 V = (Jv**2 - 1.0) / 12.0 numerator = float(np.sum(Rv - E)) denom = float(np.sqrt(np.sum(V))) tau = numerator / denom if denom > 0 else 0.0 p_value = float(2.0 * _scipy_norm.sf(abs(tau))) return { "tau": tau, "p_value": p_value, "numerator": numerator, "denominator": denom, "n_used": n_used, }
[docs] def rauzy_completeness( abs_mag: np.ndarray, redshift: np.ndarray, mag_limit: float, cosmo: FlatLambdaCDM, ) -> dict[str, float]: """Rauzy (2001) Tc and Tv completeness statistics. The Tc and Tv tests check whether the distribution of magnitudes within the associated sets J_i is consistent with a complete, flux-limited sample (Johnston 2011 §10.2, Eq. 160–166). The ζ_i variables are uniform on [0, 1] under the null hypothesis of completeness; deviations indicate incompleteness. Two statistics are returned: * **Tc** (completeness in count): based on the mean of ζ_i, which should equal 0.5 under H₀. * **Tv** (completeness in variance): based on the variance of ζ_i, which should equal 1/12 under H₀. Parameters ---------- abs_mag : ndarray, shape (N,) Absolute magnitudes. redshift : ndarray, shape (N,) Redshifts. mag_limit : float Survey apparent-magnitude limit [mag]. cosmo : FlatLambdaCDM Cosmology for distance modulus. Returns ------- dict with keys: Tc : float Completeness statistic based on ranks (N(0,1) under H₀). Tv : float Completeness statistic based on rank variance (N(0,1) under H₀). zeta_mean : float Sample mean of ζ_i (should be 0.5 under H₀). zeta_var : float Sample variance of ζ_i (should be 1/12 ≈ 0.0833 under H₀). n_used : int Number of galaxies contributing. References ---------- Rauzy (2001), MNRAS 324, 51. Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757. Johnston (2011), arXiv:1106.2039v3, §10.2, Eq. 160–166. """ abs_mag = np.asarray(abs_mag, dtype=np.float64) redshift = np.asarray(redshift, dtype=np.float64) mu = cosmo.distmod(redshift).value M_faint = mag_limit - mu J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None]) J_size = J_mask.sum(axis=1) R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1) valid = J_size >= 2 n_used = int(valid.sum()) if n_used == 0: return {"Tc": 0.0, "Tv": 0.0, "zeta_mean": np.nan, "zeta_var": np.nan, "n_used": 0} Jv = J_size[valid].astype(float) Rv = R[valid].astype(float) # ζ_i = R_i / (|J_i| + 1) ∈ (0,1) under completeness (Johnston 2011 Eq. 163) zeta = Rv / (Jv + 1.0) zeta_mean = float(np.mean(zeta)) zeta_var = float(np.var(zeta, ddof=1)) # Tc: standardised deviation of mean(ζ) from 0.5 sigma_mean = np.sqrt(1.0 / (12.0 * n_used)) Tc = (zeta_mean - 0.5) / sigma_mean if sigma_mean > 0 else 0.0 # Tv: standardised deviation of var(ζ) from 1/12 sigma_var = np.sqrt(1.0 / (80.0 * n_used)) Tv = (zeta_var - 1.0 / 12.0) / sigma_var if sigma_var > 0 else 0.0 return { "Tc": float(Tc), "Tv": float(Tv), "zeta_mean": zeta_mean, "zeta_var": zeta_var, "n_used": n_used, }