Source code for sum_stat.lf_smf.independence

"""Statistical tests for magnitude–redshift independence in truncated samples.

Implements the Efron & Petrosian (1992) τ test and the Rauzy (2001) Tc/Tv
completeness tests as described in Johnston (2011).

Reference
---------
Efron & Petrosian (1992), ApJ 399, 345.
Rauzy (2001), MNRAS 324, 51.
Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757.
Johnston (2011), arXiv:1106.2039v3, §10.1–10.2, Eq. 155–166.
"""

from __future__ import annotations

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from scipy.stats import norm as _scipy_norm



[docs]
def efron_petrosian_tau(
    abs_mag: np.ndarray,
    redshift: np.ndarray,
    mag_limit: float,
    cosmo: FlatLambdaCDM,
    weight: np.ndarray | None = None,
) -> dict[str, float]:
    """Efron–Petrosian τ statistic for testing LF/redshift independence.

    Tests the null hypothesis H₀ that absolute magnitude M is statistically
    independent of redshift z given the flux-limited selection truncation.
    Under H₀, τ ~ N(0, 1) (Johnston 2011 §10.1, Eq. 158–159)::

        τ = Σ_i (R_i − E_i) / sqrt(Σ_i V_i)

    For each galaxy i the associated set is (Johnston 2011 Eq. 156)::

        J_i = {j : z_j ≤ z_i  AND  M_j < M_faint(z_i)}

    i.e. galaxies at redshifts no greater than z_i whose magnitude is
    observable at z_i.  R_i is the rank of M_i within {M_j : j ∈ J_i}
    ordered by brightness, E_i = (\|J_i\| + 1)/2 is the expected rank under
    H₀, and V_i = (\|J_i\|² − 1)/12 is its variance.

    A positive τ means brighter galaxies are found preferentially at higher
    redshift (positive luminosity evolution).

    Parameters
    ----------
    abs_mag : ndarray, shape (N,)
        Absolute magnitudes.
    redshift : ndarray, shape (N,)
        Redshifts.
    mag_limit : float
        Survey apparent-magnitude limit [mag].
    cosmo : FlatLambdaCDM
        Cosmology for distance modulus.
    weight : ndarray, optional
        Per-galaxy weights (currently unused in the rank computation but
        reserved for future weighted extensions).

    Returns
    -------
    dict with keys:

    tau : float
        τ statistic; τ ~ N(0,1) under H₀.
    p_value : float
        Two-tailed p-value P(\|Z\| ≥ \|τ\|) under H₀.
    numerator : float
        Σ_i (R_i − E_i).
    denominator : float
        sqrt(Σ_i V_i).
    n_used : int
        Number of galaxies with \|J_i\| ≥ 2 contributing to τ.

    References
    ----------
    Efron & Petrosian (1992), ApJ 399, 345.
    Johnston (2011), arXiv:1106.2039v3, §10.1, Eq. 155–159.
    """
    abs_mag = np.asarray(abs_mag, dtype=np.float64)
    redshift = np.asarray(redshift, dtype=np.float64)

    mu = cosmo.distmod(redshift).value
    M_faint = mag_limit - mu  # faintest observable abs mag at each z

    # Vectorised: J_mask[i, j] = True if j ∈ J_i
    # J_i = {j : z_j ≤ z_i  AND  M_j < M_faint(z_i)}
    # i.e., galaxies at z ≤ z_i whose magnitude is observable at z_i.
    # This is the standard EP (1992) associated-set definition for
    # flux-limited surveys (Johnston 2011 §10.1, Eq. 156).
    J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None])
    J_size = J_mask.sum(axis=1)  # |J_i|

    # R_i = |{j ∈ J_i : M_j ≤ M_i}|  (rank of M_i within J_i by brightness)
    R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1)

    valid = J_size >= 2
    n_used = int(valid.sum())
    if n_used == 0:
        return {"tau": 0.0, "p_value": 1.0, "numerator": 0.0, "denominator": 0.0, "n_used": 0}

    Jv = J_size[valid].astype(float)
    Rv = R[valid].astype(float)
    E = (Jv + 1.0) / 2.0
    V = (Jv**2 - 1.0) / 12.0

    numerator = float(np.sum(Rv - E))
    denom = float(np.sqrt(np.sum(V)))
    tau = numerator / denom if denom > 0 else 0.0
    p_value = float(2.0 * _scipy_norm.sf(abs(tau)))

    return {
        "tau": tau,
        "p_value": p_value,
        "numerator": numerator,
        "denominator": denom,
        "n_used": n_used,
    }




[docs]
def rauzy_completeness(
    abs_mag: np.ndarray,
    redshift: np.ndarray,
    mag_limit: float,
    cosmo: FlatLambdaCDM,
) -> dict[str, float]:
    """Rauzy (2001) Tc and Tv completeness statistics.

    The Tc and Tv tests check whether the distribution of magnitudes within
    the associated sets J_i is consistent with a complete, flux-limited sample
    (Johnston 2011 §10.2, Eq. 160–166).  The ζ_i variables are uniform on
    [0, 1] under the null hypothesis of completeness; deviations indicate
    incompleteness.

    Two statistics are returned:

    * **Tc** (completeness in count): based on the mean of ζ_i, which should
      equal 0.5 under H₀.
    * **Tv** (completeness in variance): based on the variance of ζ_i, which
      should equal 1/12 under H₀.

    Parameters
    ----------
    abs_mag : ndarray, shape (N,)
        Absolute magnitudes.
    redshift : ndarray, shape (N,)
        Redshifts.
    mag_limit : float
        Survey apparent-magnitude limit [mag].
    cosmo : FlatLambdaCDM
        Cosmology for distance modulus.

    Returns
    -------
    dict with keys:

    Tc : float
        Completeness statistic based on ranks (N(0,1) under H₀).
    Tv : float
        Completeness statistic based on rank variance (N(0,1) under H₀).
    zeta_mean : float
        Sample mean of ζ_i (should be 0.5 under H₀).
    zeta_var : float
        Sample variance of ζ_i (should be 1/12 ≈ 0.0833 under H₀).
    n_used : int
        Number of galaxies contributing.

    References
    ----------
    Rauzy (2001), MNRAS 324, 51.
    Johnston, Teodoro & Hendry (2007), MNRAS 376, 1757.
    Johnston (2011), arXiv:1106.2039v3, §10.2, Eq. 160–166.
    """
    abs_mag = np.asarray(abs_mag, dtype=np.float64)
    redshift = np.asarray(redshift, dtype=np.float64)

    mu = cosmo.distmod(redshift).value
    M_faint = mag_limit - mu

    J_mask = (redshift[None, :] <= redshift[:, None]) & (abs_mag[None, :] < M_faint[:, None])
    J_size = J_mask.sum(axis=1)
    R = (J_mask & (abs_mag[None, :] <= abs_mag[:, None])).sum(axis=1)

    valid = J_size >= 2
    n_used = int(valid.sum())
    if n_used == 0:
        return {"Tc": 0.0, "Tv": 0.0, "zeta_mean": np.nan, "zeta_var": np.nan, "n_used": 0}

    Jv = J_size[valid].astype(float)
    Rv = R[valid].astype(float)

    # ζ_i = R_i / (|J_i| + 1) ∈ (0,1) under completeness (Johnston 2011 Eq. 163)
    zeta = Rv / (Jv + 1.0)
    zeta_mean = float(np.mean(zeta))
    zeta_var = float(np.var(zeta, ddof=1))

    # Tc: standardised deviation of mean(ζ) from 0.5
    sigma_mean = np.sqrt(1.0 / (12.0 * n_used))
    Tc = (zeta_mean - 0.5) / sigma_mean if sigma_mean > 0 else 0.0

    # Tv: standardised deviation of var(ζ) from 1/12
    sigma_var = np.sqrt(1.0 / (80.0 * n_used))
    Tv = (zeta_var - 1.0 / 12.0) / sigma_var if sigma_var > 0 else 0.0

    return {
        "Tc": float(Tc),
        "Tv": float(Tv),
        "zeta_mean": zeta_mean,
        "zeta_var": zeta_var,
        "n_used": n_used,
    }