Source code for yaw.randoms

"""
Implements random generators that can be sampled to create random catalogs for
correlation measurements.

The generators create uniform random coordinates (with optional constraints) and
can additionally draw weights or redshifts from a set of observed values.
"""

from __future__ import annotations

from abc import abstractmethod
from typing import TYPE_CHECKING

import numpy as np

from yaw.datachunk import DataChunk, DataChunkInfo, HandlesDataChunk, TypeDataChunk

HEALPY_ENABLED = False
"""Healpix-based randoms are enabled if healpy can be imported."""
try:
    import healpy

    HEALPY_ENABLED = True

except ImportError:
    pass

if TYPE_CHECKING:
    from numpy.typing import NDArray

__all__ = [
    "BoxRandoms",
    "HealPixRandoms",
]


class RandomsBase(HandlesDataChunk):
    """Meta-class for all random point generators."""

    @abstractmethod
    def __init__(
        self,
        *args,
        weights: NDArray | None = None,
        redshifts: NDArray | None = None,
        seed: int = 12345,
        **kwargs,
    ) -> None:
        self._chunk_info = DataChunkInfo(
            has_weights=weights is not None,
            has_redshifts=redshifts is not None,
        )
        self.reseed(seed)
        self.weights = weights
        self.redshifts = redshifts
        self.data_size = self.get_data_size()

    def get_data_size(self) -> int:
        """
        Get the number attached data samples to draw from.

        Checks the length of the :attr:`weights` and :attr:`redshifts` and
        returns their length. If neither are defined, returns -1.

        Returns:
            Number of observations or -1.

        Raises:
            ValueError:
                If the lengths of the arrays do not match.
        """
        if self.weights is None and self.redshifts is None:
            return -1
        elif self.weights is None:
            return len(self.redshifts)
        elif self.redshifts is None:
            return len(self.weights)

        if len(self.weights) != len(self.redshifts):
            raise ValueError(
                "number of 'weights' and 'redshifts' to draw from does not match"
            )
        return len(self.weights)

    def reseed(self, seed: int | None = None) -> None:
        if seed is not None:
            self.seed = int(seed)

        # compute seed the same way v2 used to do, which was how v1 used to do
        seeder = np.random.SeedSequence(self.seed)
        seed = seeder.spawn(1)[0]

        self.rng = np.random.default_rng(seed)

    @abstractmethod
    def _draw_coords(self, probe_size: int) -> tuple[NDArray, NDArray]:
        """
        Draw a number of random uniform coordinates in radian.

        Args:
            probe_size:
                Number of points to draw with repetition from the data samples.

        Returns:
            Tuple of arrays containing random right ascensions and declinations.
        """
        pass

    def _draw_attributes(self, probe_size: int) -> dict[str, NDArray]:
        """
        Draw a number of samples from attached data samples.

        Args:
            probe_size:
                Number of points to draw with repetition from the data samples.

        Returns:
            Dictionary with optional keys ``weights`` and ``redshifts`` with
            drawn samples.
        """
        if self.data_size == -1:
            return dict()

        data = dict()
        idx = self.rng.integers(0, self.data_size, size=probe_size)
        if self.has_weights:
            data["weights"] = self.weights[idx]
        if self.has_redshifts:
            data["redshifts"] = self.redshifts[idx]
        return data

    def __call__(self, probe_size: int) -> TypeDataChunk:
        """
        Draw a new sample of uniform random points.

        Args:
            probe_size:
                Number of points to generate.

        Returns:
            Dictionary of arrays, always contains keys ``ra`` and ``dec`` for
            coordinates of random points in radian. Optionally contains
            ``weights`` and/or ``redshifts`` if data as been provided to sample
            from.
        """
        ra, dec = self._draw_coords(probe_size)
        optionals = self._draw_attributes(probe_size)
        _, chunk = DataChunk.create(
            ra, dec, **optionals, degrees=False, chkfinite=False
        )
        return chunk

    def generate_dataframe(self, probe_size: int, *, degrees: bool = True):
        """
        Draw a new sample of uniform random points into a pandas DataFrame.

        Requires installing the optional dependency `pandas`.

        Args:
            probe_size:
                Number of points to generate.

        Keyword Args:
            degrees:
                Whether to return the coordinates in degrees (the default).

        Returns:
            A `pandas.DataFrame`, always contains keys ``ra`` and ``dec`` for
            coordinates of random points in degrees or radian. Optionally
            contains ``weights`` and/or ``redshifts`` if data as been provided
            to sample from.
        """
        try:
            import pandas as pd
        except ImportError as err:
            raise ImportError(
                "optional dependency 'pandas' required to generate DataFrames"
            ) from err

        df = pd.DataFrame.from_records(self(probe_size))
        if degrees:
            df["ra"] = np.rad2deg(df["ra"])
            df["dec"] = np.rad2deg(df["dec"])
        return df


[docs] class BoxRandoms(RandomsBase): """ Generates random points within a right ascension / declination window. Generators are used with the :meth:`~yaw.Catalog.from_random` method to create a catalog with uniformly distributed random coordiantes. Additional redshifts or weights (e.g. from an observed data sample) may be attached to randomly sample from their distribution. Call instance to generate random points. Args: ra_min: The lower limit of the right ascension in degrees. ra_max: The upper limit of the right ascension in degrees. dec_min: The lower limit of the declination in degrees. dec_max: The upper limit of the declination in degrees. weights: Optional array of weights to draw from. redshifts: Optional array of redshifts to draw from. seed: Integer number from which the random seed is generated. Attributes: weights: Optional array of weights to draw from. redshifts: Optional array of redshifts to draw from. """ def __init__( self, ra_min: float, ra_max: float, dec_min: float, dec_max: float, *, weights: NDArray | None = None, redshifts: NDArray | None = None, seed: int = 12345, ) -> None: super().__init__(weights=weights, redshifts=redshifts, seed=seed) self.x_min, self.y_min = self._sky2cylinder( np.deg2rad(ra_min), np.deg2rad(dec_min) ) self.x_max, self.y_max = self._sky2cylinder( np.deg2rad(ra_max), np.deg2rad(dec_max) ) def __repr__(self) -> str: string = repr(self._chunk_info) string = string.lstrip(str(type(self._chunk_info))) return f"{type(self).__name__}{string}" def _sky2cylinder(self, ra: NDArray, dec: NDArray) -> tuple[NDArray, NDArray]: x = ra y = np.sin(dec) return x, y def _cylinder2sky(self, x: NDArray, y: NDArray) -> tuple[NDArray, NDArray]: ra = x dec = np.arcsin(y) return ra, dec def _draw_coords(self, probe_size: int) -> tuple[NDArray, NDArray]: x = self.rng.uniform(self.x_min, self.x_max, probe_size) y = self.rng.uniform(self.y_min, self.y_max, probe_size) return self._cylinder2sky(x, y)
[docs] class HealPixRandoms(RandomsBase): """ Generates random points within a `HealPix` mask. The input mask can either be interpreted as a boolean mask or a probability map that indicates the probability at which random points should be generated in a given pixel. Requires installing ``healpy``. Call instance to generate random points. .. Caution:: To improve performance, this method does not create continuous random points, but randomly draws pixel center coordinates from the highest possible mask resolution supported by `HealPix`. This corresponds to a resolution of about :math:`2500` pixels/arcsec. Args: pix_values: Array with `HealPix` mask values. nested: Whether the input mask is in the `nested` format. is_mask: Whether the input mask should be interpreted as binary mask or as probability map (default). weights: Optional array of weights to draw from. redshifts: Optional array of redshifts to draw from. seed: Integer number from which the random seed is generated. Attributes: weights: Optional array of weights to draw from. redshifts: Optional array of redshifts to draw from. nside: The `HealPix` ``nside`` value of the input mask. Raises: ImportError: If ``healpy`` is not installed. """ def __init__( self, pix_values: NDArray, *, nested: bool = False, is_mask: bool = False, weights=None, redshifts=None, seed=12345, ): if not HEALPY_ENABLED: raise ImportError("could not import optional dependency 'healpy'") super().__init__(weights=weights, redshifts=redshifts, seed=seed) values = np.asarray(pix_values, dtype=np.float64) self.nside = healpy.npix2nside(len(values)) if np.any(values < 0.0): raise ValueError("pixel values must be positive for random generation") # check which of the pixels are masked if not nested: values = healpy.reorder(values, inp="RING", out="NESTED") self._ipix_unmasked = np.nonzero(values)[0] # compute the probability of drawing from unmasked pixels if is_mask: self._probability = None else: masked_values = values[self._ipix_unmasked] self._probability = masked_values / masked_values.sum() def _draw_coords(self, probe_size: int) -> tuple[NDArray, NDArray]: """ The general idea is to generate a list of pixels to draw coordinates from. Then go to that pixel, view it at the highest possible resolution and draw a random sub-pixel and use its center coordinate. """ MAX_ORDER = 29 MAX_NSIDE = 2**MAX_ORDER # sample random pixel IDs at this resolution # generate list of pixel IDs to draw from (factoring in pixel weight) ipix_draw = np.random.choice( self._ipix_unmasked, size=probe_size, p=self._probability, ) # transform pixel ID to resolution of MAX_ORDER (using nested convention) order = healpy.nside2order(self.nside) scale = 4 ** (MAX_ORDER - order) ipix_scaled = ipix_draw * scale # draw random pixel IDs at maximum resolution and get center coordinates ipix_rand = ipix_scaled + self.rng.integers(0, scale, size=probe_size) ra, dec = healpy.pix2ang( nside=MAX_NSIDE, ipix=ipix_rand, nest=True, lonlat=True ) return np.deg2rad(ra), np.deg2rad(dec)