Source code for sum_stat.twopcf.projected

"""Projected two-point correlation function w_p(r_p).

Provides:
- corrfunc-based measurement via :func:`wp` (Landy-Szalay, pair counting)
"""

from __future__ import annotations

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from Corrfunc.mocks import DDrppi_mocks
from Corrfunc.utils import convert_rp_pi_counts_to_wp

from ..catalogue import GalaxyCatalogue



[docs]
def wp(
    gal: GalaxyCatalogue,
    rand: GalaxyCatalogue,
    rp_bins: np.ndarray,
    pi_max: float,
    cosmo: FlatLambdaCDM,
    n_threads: int = 4,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Projected correlation function w_p(r_p) via Landy-Szalay estimator.

    Uses Corrfunc ``DDrppi_mocks`` with pre-computed comoving distances to count
    pairs in (r_p, π) bins, then integrates over π up to ``pi_max``.

    Notes
    -----
    The previous treecorr-based implementation using ``FisherRperp`` with
    ``min_rpar=0`` produced all-negative wp due to a signed-rpar bug: treecorr
    uses ``r_par = r2 - r1`` (by catalog index, not distance), so ``min_rpar=0``
    discards the majority of auto-correlation (DD, RR) pairs while only halving
    cross-correlation (DR) pairs.  Corrfunc with ``is_comoving_dist=True`` is
    the recommended approach for wp on mock catalogues.

    Parameters
    ----------
    gal : GalaxyCatalogue
        Galaxy catalogue (ra, dec, redshift required).
    rand : GalaxyCatalogue
        Random catalogue (ra, dec, redshift required).
    rp_bins : ndarray, shape (n_bins+1,)
        Projected separation bin edges [Mpc].
    pi_max : float
        Maximum line-of-sight integration limit [Mpc].  Rounded to the
        nearest integer because corrfunc uses dπ = 1 Mpc bins.
    cosmo : FlatLambdaCDM
        Cosmology for redshift-to-comoving-distance conversion.
    n_threads : int
        Number of OpenMP threads for corrfunc.

    Returns
    -------
    rp_centres : ndarray, shape (n_bins,)
        Geometric mean projected separation per bin [Mpc].
    wp_vals : ndarray, shape (n_bins,)
        Projected correlation function [Mpc].
    var_wp : ndarray, shape (n_bins,)
        Poisson variance estimate from DD pair counts [Mpc^2].
        Bins with zero DD pairs are assigned ``np.inf``.

    Performance
    -----------
    ~3 s/call  (N_gal=5000, N_rand=50000, n_bins=20, pi_max=100, 4 threads, CPU x86-64)
    """
    rand = rand.subsample(5 * gal.n)
    rp_bins = np.asarray(rp_bins, dtype=np.float64)
    pi_max_int = int(round(pi_max))
    n_rp = len(rp_bins) - 1

    chi_gal = gal.comoving_distance(cosmo).astype(np.float64)
    chi_rand = rand.comoving_distance(cosmo).astype(np.float64)

    kw = dict(
        cosmology=2,
        nthreads=n_threads,
        pimax=pi_max_int,
        binfile=rp_bins,
        is_comoving_dist=True,
        weight_type="pair_product",
    )

    dd_res = DDrppi_mocks(
        True,
        RA1=np.asarray(gal.ra, dtype=np.float64),
        DEC1=np.asarray(gal.dec, dtype=np.float64),
        CZ1=chi_gal,
        weights1=np.asarray(gal.weight, dtype=np.float64),
        **kw,
    )
    dr_res = DDrppi_mocks(
        False,
        RA1=np.asarray(gal.ra, dtype=np.float64),
        DEC1=np.asarray(gal.dec, dtype=np.float64),
        CZ1=chi_gal,
        weights1=np.asarray(gal.weight, dtype=np.float64),
        RA2=np.asarray(rand.ra, dtype=np.float64),
        DEC2=np.asarray(rand.dec, dtype=np.float64),
        CZ2=chi_rand,
        weights2=np.asarray(rand.weight, dtype=np.float64),
        **kw,
    )
    rr_res = DDrppi_mocks(
        True,
        RA1=np.asarray(rand.ra, dtype=np.float64),
        DEC1=np.asarray(rand.dec, dtype=np.float64),
        CZ1=chi_rand,
        weights1=np.asarray(rand.weight, dtype=np.float64),
        **kw,
    )

    # Normalization must use the sum of weights, not raw count, because
    # Corrfunc returns weighted pair sums (Σ w_i w_j) under pair_product.
    # For uniform weights Σ w_i = N so this is backwards-compatible.
    w_gal = float(np.asarray(gal.weight, dtype=np.float64).sum())
    w_rand = float(np.asarray(rand.weight, dtype=np.float64).sum())

    wp_vals = np.asarray(
        convert_rp_pi_counts_to_wp(
            w_gal,
            w_gal,
            w_rand,
            w_rand,
            dd_res,
            dr_res,
            dr_res,
            rr_res,
            n_rp,
            pi_max_int,
            estimator="LS",
        ),
        dtype=float,
    )

    # Poisson variance: sum DD pair counts per rp bin (over all pi slices)
    dd_counts = np.array(
        [dd_res["npairs"][i * pi_max_int : (i + 1) * pi_max_int].sum() for i in range(n_rp)],
        dtype=float,
    )
    valid = (dd_counts > 0) & np.isfinite(wp_vals)
    var_wp = np.where(valid, wp_vals**2 / np.maximum(dd_counts, 1), np.inf)

    rp_centres = np.sqrt(rp_bins[:-1] * rp_bins[1:])
    return rp_centres, wp_vals, var_wp