Source code for sum_stat.twopcf.projected

"""Projected two-point correlation function w_p(r_p).

Provides:
- corrfunc-based measurement via :func:`wp` (Landy-Szalay, pair counting)
"""

from __future__ import annotations

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from Corrfunc.mocks import DDrppi_mocks
from Corrfunc.utils import convert_rp_pi_counts_to_wp

from ..catalogue import GalaxyCatalogue


[docs] def wp( gal: GalaxyCatalogue, rand: GalaxyCatalogue, rp_bins: np.ndarray, pi_max: float, cosmo: FlatLambdaCDM, n_threads: int = 4, ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Projected correlation function w_p(r_p) via Landy-Szalay estimator. Uses Corrfunc ``DDrppi_mocks`` with pre-computed comoving distances to count pairs in (r_p, π) bins, then integrates over π up to ``pi_max``. Notes ----- The previous treecorr-based implementation using ``FisherRperp`` with ``min_rpar=0`` produced all-negative wp due to a signed-rpar bug: treecorr uses ``r_par = r2 - r1`` (by catalog index, not distance), so ``min_rpar=0`` discards the majority of auto-correlation (DD, RR) pairs while only halving cross-correlation (DR) pairs. Corrfunc with ``is_comoving_dist=True`` is the recommended approach for wp on mock catalogues. Parameters ---------- gal : GalaxyCatalogue Galaxy catalogue (ra, dec, redshift required). rand : GalaxyCatalogue Random catalogue (ra, dec, redshift required). rp_bins : ndarray, shape (n_bins+1,) Projected separation bin edges [Mpc]. pi_max : float Maximum line-of-sight integration limit [Mpc]. Rounded to the nearest integer because corrfunc uses dπ = 1 Mpc bins. cosmo : FlatLambdaCDM Cosmology for redshift-to-comoving-distance conversion. n_threads : int Number of OpenMP threads for corrfunc. Returns ------- rp_centres : ndarray, shape (n_bins,) Geometric mean projected separation per bin [Mpc]. wp_vals : ndarray, shape (n_bins,) Projected correlation function [Mpc]. var_wp : ndarray, shape (n_bins,) Poisson variance estimate from DD pair counts [Mpc^2]. Bins with zero DD pairs are assigned ``np.inf``. Performance ----------- ~3 s/call (N_gal=5000, N_rand=50000, n_bins=20, pi_max=100, 4 threads, CPU x86-64) """ rand = rand.subsample(5 * gal.n) rp_bins = np.asarray(rp_bins, dtype=np.float64) pi_max_int = int(round(pi_max)) n_rp = len(rp_bins) - 1 chi_gal = gal.comoving_distance(cosmo).astype(np.float64) chi_rand = rand.comoving_distance(cosmo).astype(np.float64) kw = dict( cosmology=2, nthreads=n_threads, pimax=pi_max_int, binfile=rp_bins, is_comoving_dist=True, weight_type="pair_product", ) dd_res = DDrppi_mocks( True, RA1=np.asarray(gal.ra, dtype=np.float64), DEC1=np.asarray(gal.dec, dtype=np.float64), CZ1=chi_gal, weights1=np.asarray(gal.weight, dtype=np.float64), **kw, ) dr_res = DDrppi_mocks( False, RA1=np.asarray(gal.ra, dtype=np.float64), DEC1=np.asarray(gal.dec, dtype=np.float64), CZ1=chi_gal, weights1=np.asarray(gal.weight, dtype=np.float64), RA2=np.asarray(rand.ra, dtype=np.float64), DEC2=np.asarray(rand.dec, dtype=np.float64), CZ2=chi_rand, weights2=np.asarray(rand.weight, dtype=np.float64), **kw, ) rr_res = DDrppi_mocks( True, RA1=np.asarray(rand.ra, dtype=np.float64), DEC1=np.asarray(rand.dec, dtype=np.float64), CZ1=chi_rand, weights1=np.asarray(rand.weight, dtype=np.float64), **kw, ) # Normalization must use the sum of weights, not raw count, because # Corrfunc returns weighted pair sums (Σ w_i w_j) under pair_product. # For uniform weights Σ w_i = N so this is backwards-compatible. w_gal = float(np.asarray(gal.weight, dtype=np.float64).sum()) w_rand = float(np.asarray(rand.weight, dtype=np.float64).sum()) wp_vals = np.asarray( convert_rp_pi_counts_to_wp( w_gal, w_gal, w_rand, w_rand, dd_res, dr_res, dr_res, rr_res, n_rp, pi_max_int, estimator="LS", ), dtype=float, ) # Poisson variance: sum DD pair counts per rp bin (over all pi slices) dd_counts = np.array( [dd_res["npairs"][i * pi_max_int : (i + 1) * pi_max_int].sum() for i in range(n_rp)], dtype=float, ) valid = (dd_counts > 0) & np.isfinite(wp_vals) var_wp = np.where(valid, wp_vals**2 / np.maximum(dd_counts, 1), np.inf) rp_centres = np.sqrt(rp_bins[:-1] * rp_bins[1:]) return rp_centres, wp_vals, var_wp