Source code for sum_stat.covariance.bootstrap

"""Spatial block bootstrap covariance estimation."""

from __future__ import annotations

from collections.abc import Callable

import numpy as np

from .jackknife import assign_jackknife_regions


[docs] def bootstrap_covariance( ra: np.ndarray, dec: np.ndarray, estimator: Callable[[np.ndarray], np.ndarray], n_bootstrap: int = 500, n_patches: int = 50, seed: int = 0, ) -> tuple[np.ndarray, np.ndarray]: """Spatial block bootstrap covariance matrix. The sky footprint is divided into ``n_patches`` spatial blocks via KMeans clustering. Each bootstrap resample draws ``n_patches`` blocks with replacement and evaluates the estimator on the combined selection. Parameters ---------- ra : ndarray, shape (N,) Right ascension [degrees]. dec : ndarray, shape (N,) Declination [degrees]. estimator : Callable Takes a boolean include-mask of shape (N,), returns ndarray (n_out,). n_bootstrap : int Number of bootstrap realisations. n_patches : int Number of spatial blocks. seed : int Random seed. Returns ------- mean_estimate : ndarray, shape (n_out,) Mean over bootstrap realisations (≈ full-sample estimate). cov : ndarray, shape (n_out, n_out) Bootstrap covariance: 1/(B-1) * Σ_b (x_b - x̄)(x_b - x̄)^T Performance ----------- ~depends on estimator cost × n_bootstrap; overhead ~5 ms/bootstrap for N=5000 """ labels = assign_jackknife_regions(ra, dec, n_patches, seed) rng = np.random.default_rng(seed) samples = [] for _ in range(n_bootstrap): chosen_patches = rng.integers(0, n_patches, size=n_patches) mask = np.zeros(len(ra), dtype=bool) for p in chosen_patches: mask |= labels == p val = np.asarray(estimator(mask)) samples.append(val) samples = np.array(samples) # (n_bootstrap, n_out) mean = np.mean(samples, axis=0) delta = samples - mean[np.newaxis, :] cov = np.einsum("bi,bj->ij", delta, delta) / (n_bootstrap - 1.0) return mean, cov