"""Statistical tests for magnitude–redshift independence in truncated samples.
Implements the Efron & Petrosian (1992) τ test and the Rauzy (2001) Tc/Tv
completeness tests as described in Johnston (2011).
Reference
---------
Efron & Petrosian (1992), ApJ 399, 345.
Rauzy (2001), MNRAS 324, 51.
Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757.
Johnston (2011), arXiv:1106.2039v3, §10.1–10.2, Eq. 155–166.
"""
from __future__ import annotations
import numpy as np
from astropy.cosmology import FlatLambdaCDM
from scipy.stats import norm as _scipy_norm
[docs]
def efron_petrosian_tau(
abs_mag: np.ndarray,
redshift: np.ndarray,
mag_limit: float,
cosmo: FlatLambdaCDM,
weight: np.ndarray | None = None,
) -> dict[str, float]:
"""Efron–Petrosian τ statistic for testing LF/redshift independence.
Tests the null hypothesis H₀ that absolute magnitude M is statistically
independent of redshift z given the flux-limited selection truncation.
Under H₀, τ ~ N(0, 1) (Johnston 2011 §10.1, Eq. 158–159)::
τ = Σ_i (R_i − E_i) / sqrt(Σ_i V_i)
For each galaxy i the associated set is (Johnston 2011 Eq. 156)::
J_i = {j : z_j ≤ z_i AND M_j < M_faint(z_i)}
i.e. galaxies at redshifts no greater than z_i whose magnitude is
observable at z_i. R_i is the rank of M_i within {M_j : j ∈ J_i}
ordered by brightness, E_i = (\|J_i\| + 1)/2 is the expected rank under
H₀, and V_i = (\|J_i\|² − 1)/12 is its variance.
A positive τ means brighter galaxies are found preferentially at higher
redshift (positive luminosity evolution).
Parameters
----------
abs_mag : ndarray, shape (N,)
Absolute magnitudes.
redshift : ndarray, shape (N,)
Redshifts.
mag_limit : float
Survey apparent-magnitude limit [mag].
cosmo : FlatLambdaCDM
Cosmology for distance modulus.
weight : ndarray, optional
Per-galaxy weights (currently unused in the rank computation but
reserved for future weighted extensions).
Returns
-------
dict with keys:
tau : float
τ statistic; τ ~ N(0,1) under H₀.
p_value : float
Two-tailed p-value P(\|Z\| ≥ \|τ\|) under H₀.
numerator : float
Σ_i (R_i − E_i).
denominator : float
sqrt(Σ_i V_i).
n_used : int
Number of galaxies with \|J_i\| ≥ 2 contributing to τ.
References
----------
Efron & Petrosian (1992), ApJ 399, 345.
Johnston (2011), arXiv:1106.2039v3, §10.1, Eq. 155–159.
"""
abs_mag = np.asarray(abs_mag, dtype=np.float64)
redshift = np.asarray(redshift, dtype=np.float64)
mu = cosmo.distmod(redshift).value
M_faint = mag_limit - mu # faintest observable abs mag at each z
# Vectorised: J_mask[i, j] = True if j ∈ J_i
# J_i = {j : z_j ≤ z_i AND M_j < M_faint(z_i)}
# i.e., galaxies at z ≤ z_i whose magnitude is observable at z_i.
# This is the standard EP (1992) associated-set definition for
# flux-limited surveys (Johnston 2011 §10.1, Eq. 156).
J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None])
J_size = J_mask.sum(axis=1) # |J_i|
# R_i = |{j ∈ J_i : M_j ≤ M_i}| (rank of M_i within J_i by brightness)
R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1)
valid = J_size >= 2
n_used = int(valid.sum())
if n_used == 0:
return {"tau": 0.0, "p_value": 1.0, "numerator": 0.0, "denominator": 0.0, "n_used": 0}
Jv = J_size[valid].astype(float)
Rv = R[valid].astype(float)
E = (Jv + 1.0) / 2.0
V = (Jv**2 - 1.0) / 12.0
numerator = float(np.sum(Rv - E))
denom = float(np.sqrt(np.sum(V)))
tau = numerator / denom if denom > 0 else 0.0
p_value = float(2.0 * _scipy_norm.sf(abs(tau)))
return {
"tau": tau,
"p_value": p_value,
"numerator": numerator,
"denominator": denom,
"n_used": n_used,
}
[docs]
def rauzy_completeness(
abs_mag: np.ndarray,
redshift: np.ndarray,
mag_limit: float,
cosmo: FlatLambdaCDM,
) -> dict[str, float]:
"""Rauzy (2001) Tc and Tv completeness statistics.
The Tc and Tv tests check whether the distribution of magnitudes within
the associated sets J_i is consistent with a complete, flux-limited sample
(Johnston 2011 §10.2, Eq. 160–166). The ζ_i variables are uniform on
[0, 1] under the null hypothesis of completeness; deviations indicate
incompleteness.
Two statistics are returned:
* **Tc** (completeness in count): based on the mean of ζ_i, which should
equal 0.5 under H₀.
* **Tv** (completeness in variance): based on the variance of ζ_i, which
should equal 1/12 under H₀.
Parameters
----------
abs_mag : ndarray, shape (N,)
Absolute magnitudes.
redshift : ndarray, shape (N,)
Redshifts.
mag_limit : float
Survey apparent-magnitude limit [mag].
cosmo : FlatLambdaCDM
Cosmology for distance modulus.
Returns
-------
dict with keys:
Tc : float
Completeness statistic based on ranks (N(0,1) under H₀).
Tv : float
Completeness statistic based on rank variance (N(0,1) under H₀).
zeta_mean : float
Sample mean of ζ_i (should be 0.5 under H₀).
zeta_var : float
Sample variance of ζ_i (should be 1/12 ≈ 0.0833 under H₀).
n_used : int
Number of galaxies contributing.
References
----------
Rauzy (2001), MNRAS 324, 51.
Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757.
Johnston (2011), arXiv:1106.2039v3, §10.2, Eq. 160–166.
"""
abs_mag = np.asarray(abs_mag, dtype=np.float64)
redshift = np.asarray(redshift, dtype=np.float64)
mu = cosmo.distmod(redshift).value
M_faint = mag_limit - mu
J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None])
J_size = J_mask.sum(axis=1)
R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1)
valid = J_size >= 2
n_used = int(valid.sum())
if n_used == 0:
return {"Tc": 0.0, "Tv": 0.0, "zeta_mean": np.nan, "zeta_var": np.nan, "n_used": 0}
Jv = J_size[valid].astype(float)
Rv = R[valid].astype(float)
# ζ_i = R_i / (|J_i| + 1) ∈ (0,1) under completeness (Johnston 2011 Eq. 163)
zeta = Rv / (Jv + 1.0)
zeta_mean = float(np.mean(zeta))
zeta_var = float(np.var(zeta, ddof=1))
# Tc: standardised deviation of mean(ζ) from 0.5
sigma_mean = np.sqrt(1.0 / (12.0 * n_used))
Tc = (zeta_mean - 0.5) / sigma_mean if sigma_mean > 0 else 0.0
# Tv: standardised deviation of var(ζ) from 1/12
sigma_var = np.sqrt(1.0 / (80.0 * n_used))
Tv = (zeta_var - 1.0 / 12.0) / sigma_var if sigma_var > 0 else 0.0
return {
"Tc": float(Tc),
"Tv": float(Tv),
"zeta_mean": zeta_mean,
"zeta_var": zeta_var,
"n_used": n_used,
}