"""Spatial jackknife covariance estimation using HEALPix patches."""
from __future__ import annotations
from collections.abc import Callable
import numpy as np
from sklearn.cluster import KMeans
[docs]
def assign_jackknife_regions(
ra: np.ndarray,
dec: np.ndarray,
n_regions: int = 100,
seed: int = 0,
) -> np.ndarray:
"""Assign objects to spatial jackknife regions via KMeans clustering.
Objects are projected onto the unit sphere (x, y, z) and clustered
into ``n_regions`` groups of approximately equal angular area using
KMeans.
Parameters
----------
ra : ndarray, shape (N,)
Right ascension [degrees].
dec : ndarray, shape (N,)
Declination [degrees].
n_regions : int
Number of jackknife regions.
seed : int
Random seed for KMeans initialisation.
Returns
-------
labels : ndarray, shape (N,)
Integer region labels 0 … n_regions-1.
"""
ra_r = np.deg2rad(ra)
dec_r = np.deg2rad(dec)
x = np.cos(dec_r) * np.cos(ra_r)
y = np.cos(dec_r) * np.sin(ra_r)
z = np.sin(dec_r)
coords = np.column_stack([x, y, z])
km = KMeans(n_clusters=n_regions, random_state=seed, n_init=3)
labels = km.fit_predict(coords)
return labels.astype(np.int32)
[docs]
def jackknife_covariance(
ra: np.ndarray,
dec: np.ndarray,
estimator: Callable[[np.ndarray], np.ndarray],
n_regions: int = 100,
seed: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
"""Spatial jackknife covariance matrix.
Divides the footprint into ``n_regions`` HEALPix-like patches, then
evaluates ``estimator`` n_regions times, each time omitting one patch.
Parameters
----------
ra : ndarray, shape (N,)
Right ascension of the objects [degrees].
dec : ndarray, shape (N,)
Declination of the objects [degrees].
estimator : Callable
Function that takes a boolean mask array of shape (N,) indicating
which objects to include, and returns an ndarray of shape (n_out,).
n_regions : int
Number of jackknife regions.
seed : int
Random seed for KMeans.
Returns
-------
mean_estimate : ndarray, shape (n_out,)
Mean of the jackknife subsamples (≈ full-sample estimate).
cov : ndarray, shape (n_out, n_out)
Jackknife covariance: (N_jk - 1)/N_jk * Σ (x_k - x̄)(x_k - x̄)^T
Performance
-----------
~depends on estimator cost; overhead ~1 ms/region for region assignment
"""
labels = assign_jackknife_regions(ra, dec, n_regions, seed)
return jackknife_covariance_from_labels(ra, dec, labels, estimator, n_regions)
[docs]
def jackknife_covariance_from_labels(
ra: np.ndarray,
dec: np.ndarray,
labels: np.ndarray,
estimator: Callable[[np.ndarray], np.ndarray],
n_regions: int,
) -> tuple[np.ndarray, np.ndarray]:
"""Jackknife covariance from pre-assigned region labels.
Parameters
----------
ra, dec : ndarray, shape (N,)
Positions (only used to pass to estimator via mask).
labels : ndarray, shape (N,)
Pre-assigned region labels 0 … n_regions-1.
estimator : Callable
Takes a boolean include-mask of shape (N,), returns ndarray (n_out,).
n_regions : int
Number of jackknife regions.
Returns
-------
mean_estimate : ndarray, shape (n_out,)
cov : ndarray, shape (n_out, n_out)
"""
subsamples = []
for k in range(n_regions):
mask = labels != k # leave-one-out
subsample = np.asarray(estimator(mask))
subsamples.append(subsample)
return jackknife_covariance_from_subsamples(np.array(subsamples))
[docs]
def jackknife_covariance_from_subsamples(
subsamples: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
"""Jackknife covariance from pre-computed leave-one-out estimates.
Parameters
----------
subsamples : ndarray, shape (n_jack, n_bins)
Leave-one-out estimate vectors.
Returns
-------
mean_estimate : ndarray, shape (n_bins,)
Mean of the jackknife subsamples.
cov : ndarray, shape (n_bins, n_bins)
Jackknife covariance matrix: (N-1)/N * Σ_k (x_k - x̄)(x_k - x̄)^T
"""
subsamples = np.asarray(subsamples)
n_jack = subsamples.shape[0]
mean = np.mean(subsamples, axis=0)
delta = subsamples - mean[np.newaxis, :]
cov = (n_jack - 1.0) / n_jack * np.einsum("ki,kj->ij", delta, delta)
return mean, cov