Source code for sum_stat.covariance.jackknife

"""Spatial jackknife covariance estimation using HEALPix patches."""

from __future__ import annotations

from collections.abc import Callable

import numpy as np
from sklearn.cluster import KMeans


[docs] def assign_jackknife_regions( ra: np.ndarray, dec: np.ndarray, n_regions: int = 100, seed: int = 0, ) -> np.ndarray: """Assign objects to spatial jackknife regions via KMeans clustering. Objects are projected onto the unit sphere (x, y, z) and clustered into ``n_regions`` groups of approximately equal angular area using KMeans. Parameters ---------- ra : ndarray, shape (N,) Right ascension [degrees]. dec : ndarray, shape (N,) Declination [degrees]. n_regions : int Number of jackknife regions. seed : int Random seed for KMeans initialisation. Returns ------- labels : ndarray, shape (N,) Integer region labels 0 … n_regions-1. """ ra_r = np.deg2rad(ra) dec_r = np.deg2rad(dec) x = np.cos(dec_r) * np.cos(ra_r) y = np.cos(dec_r) * np.sin(ra_r) z = np.sin(dec_r) coords = np.column_stack([x, y, z]) km = KMeans(n_clusters=n_regions, random_state=seed, n_init=3) labels = km.fit_predict(coords) return labels.astype(np.int32)
[docs] def jackknife_covariance( ra: np.ndarray, dec: np.ndarray, estimator: Callable[[np.ndarray], np.ndarray], n_regions: int = 100, seed: int = 0, ) -> tuple[np.ndarray, np.ndarray]: """Spatial jackknife covariance matrix. Divides the footprint into ``n_regions`` HEALPix-like patches, then evaluates ``estimator`` n_regions times, each time omitting one patch. Parameters ---------- ra : ndarray, shape (N,) Right ascension of the objects [degrees]. dec : ndarray, shape (N,) Declination of the objects [degrees]. estimator : Callable Function that takes a boolean mask array of shape (N,) indicating which objects to include, and returns an ndarray of shape (n_out,). n_regions : int Number of jackknife regions. seed : int Random seed for KMeans. Returns ------- mean_estimate : ndarray, shape (n_out,) Mean of the jackknife subsamples (≈ full-sample estimate). cov : ndarray, shape (n_out, n_out) Jackknife covariance: (N_jk - 1)/N_jk * Σ (x_k - x̄)(x_k - x̄)^T Performance ----------- ~depends on estimator cost; overhead ~1 ms/region for region assignment """ labels = assign_jackknife_regions(ra, dec, n_regions, seed) return jackknife_covariance_from_labels(ra, dec, labels, estimator, n_regions)
[docs] def jackknife_covariance_from_labels( ra: np.ndarray, dec: np.ndarray, labels: np.ndarray, estimator: Callable[[np.ndarray], np.ndarray], n_regions: int, ) -> tuple[np.ndarray, np.ndarray]: """Jackknife covariance from pre-assigned region labels. Parameters ---------- ra, dec : ndarray, shape (N,) Positions (only used to pass to estimator via mask). labels : ndarray, shape (N,) Pre-assigned region labels 0 … n_regions-1. estimator : Callable Takes a boolean include-mask of shape (N,), returns ndarray (n_out,). n_regions : int Number of jackknife regions. Returns ------- mean_estimate : ndarray, shape (n_out,) cov : ndarray, shape (n_out, n_out) """ subsamples = [] for k in range(n_regions): mask = labels != k # leave-one-out subsample = np.asarray(estimator(mask)) subsamples.append(subsample) return jackknife_covariance_from_subsamples(np.array(subsamples))
[docs] def jackknife_covariance_from_subsamples( subsamples: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """Jackknife covariance from pre-computed leave-one-out estimates. Parameters ---------- subsamples : ndarray, shape (n_jack, n_bins) Leave-one-out estimate vectors. Returns ------- mean_estimate : ndarray, shape (n_bins,) Mean of the jackknife subsamples. cov : ndarray, shape (n_bins, n_bins) Jackknife covariance matrix: (N-1)/N * Σ_k (x_k - x̄)(x_k - x̄)^T """ subsamples = np.asarray(subsamples) n_jack = subsamples.shape[0] mean = np.mean(subsamples, axis=0) delta = subsamples - mean[np.newaxis, :] cov = (n_jack - 1.0) / n_jack * np.einsum("ki,kj->ij", delta, delta) return mean, cov