Source code for yaw.core.abc

"""This module implements some abstract base classes that define the interfaces
for high level containers in other modules.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Type, TypeVar

import h5py
import numpy as np
import pandas as pd
from numpy.typing import NDArray

if TYPE_CHECKING:  # pragma: no cover
    from pandas import IntervalIndex

    from yaw.core.containers import Indexer
    from yaw.core.utils import TypePathStr

__all__ = [
    "PatchedQuantity",
    "BinnedQuantity",
    "concatenate_bin_edges",
    "HDFSerializable",
    "DictRepresentation",
]


_Tpatched = TypeVar("_Tpatched", bound="PatchedQuantity")


[docs] class PatchedQuantity(ABC): """Base class for an object that has data organised in spatial patches.""" @property @abstractmethod def n_patches(self) -> int: """Get the number of spatial patches.""" pass @property @abstractmethod def patches(self) -> Indexer: """An :obj:`~yaw.core.containers.Indexer` attribute that supports iteration over the spatial patches or selecting a subset of the patches. The indexer always returns new container instances with the indexed data subset or the current item when iterating. .. Note:: Indexing rules for a one-dimensional numpy array apply. Returns: :obj:`yaw.core.containers.Indexer` """ pass
[docs] @abstractmethod def concatenate_patches(self: _Tpatched, *data: _Tpatched) -> _Tpatched: """Concatenate pair count data containers with equal redshift binning. The data is merged by extending the dimension of the patch axes. The resulting data array will be a block matrix of the input data arrays, i.e. all elements with correlations between different inputs set to zero. .. Note:: Necessary condition for merging is that the the redshift binning of all inputs is identical. Cannot merge cross- with autocorrelation containers. Args: *data: Containers of same type that are appended to the patch dimension of this container. Returns: New instance of this container with combined data. """ pass
_Tbinned = TypeVar("_Tbinned", bound="BinnedQuantity")
[docs] class BinnedQuantity(ABC): """Base class for an object that has data organised in redshift bins."""
[docs] @abstractmethod def get_binning(self) -> IntervalIndex: """Get the underlying, exact redshift bin intervals. Returns: :obj:`pandas.IntervalIndex` """ pass
def __repr__(self) -> str: name = self.__class__.__name__ n_bins = self.n_bins binning = self.get_binning() z = f"{binning[0].left:.3f}...{binning[-1].right:.3f}" return f"{name}({n_bins=}, {z=})" @property def n_bins(self) -> int: """Get the number of redshift bins.""" return len(self.get_binning()) @property def mids(self) -> NDArray[np.float64]: """Get the centers of the redshift bins as array.""" return np.array([z.mid for z in self.get_binning()]) @property def edges(self) -> NDArray[np.float64]: """Get the edges of the redshift bins as flat array.""" binning = self.get_binning() return np.append(binning.left, binning.right[-1]) @property def dz(self) -> NDArray[np.float64]: """Get the width of the redshift bins as array.""" return np.diff(self.edges) @property def closed(self) -> str: """Specifies on which side the redshift bin intervals are closed, can be: ``left``, ``right``, ``both``, ``neither``.""" return self.get_binning().closed @property @abstractmethod def bins(self) -> Indexer: """An :obj:`~yaw.core.containers.Indexer` attribute that supports iteration over the bins or selecting a subset of the bins. The indexer always returns new container instances with the indexed data subset or the current item when iterating. .. Warning:: Indexing rules for a one-dimensional numpy array apply, however if the resulting binning is not contiguous or contains repeated bins, some operations on the returned container may fail. Returns: :obj:`yaw.core.containers.Indexer` """ pass
[docs] def is_compatible(self: _Tbinned, other: _Tbinned, require: bool = False) -> bool: """Check whether this instance is compatible with another instance. Ensures that both objects are instances of the same class and that the redshift binning is identical. Args: other (:obj:`BinnedQuantity`): Object instance to compare to. require (:obj:`bool`, optional) Raise a ValueError if any of the checks fail. Returns: :obj:`bool` """ if not isinstance(other, self.__class__): raise TypeError( f"object of type {type(other)} is not compatible with " f"{self.__class__}" ) if self.n_bins != other.n_bins: if require: raise ValueError("number of bins do not agree") return False if np.any(self.get_binning() != other.get_binning()): if require: raise ValueError("binning is not identical") return False return True
[docs] @abstractmethod def concatenate_bins(self: _Tbinned, *data: _Tbinned) -> _Tbinned: """Concatenate pair count data containers with equal patches. The data is merged by appending the data along the redshift binning axis. .. Note:: Necessary condition for merging is that the patch numbers are identical and that the merged binning is contiguous and non-overlapping. Cannot merge cross- with autocorrelation containers. Args: *data: Containers of same type that are appended to the patch dimension of this container. Returns: New instance of this container with combined data. """ pass
def concatenate_bin_edges(*patched: BinnedQuantity) -> IntervalIndex: """Concatenate the binning a set of data containers. The input containers are automatically sorted by the lowest edge of the redshift binning. Necessary condidtions for mergning are are that the patch numbers are identical and that the resulting is contiguous and non-overlapping, i.e. the final edge of the previous binning must be identical to the lowest edge of the next binning. """ patched = sorted([p for p in patched], key=lambda p: p.edges[0]) reference = patched[0] edges = reference.edges for other in patched[1:]: if edges[-1] == other.edges[0]: edges = np.concatenate([edges, other.edges[1:]]) else: raise ValueError("cannot merge, bins are not contiguous") return pd.IntervalIndex.from_breaks(edges, closed=reference.closed) _Thdf = TypeVar("_Thdf", bound="HDFSerializable")
[docs] class HDFSerializable(ABC): """Base class for an object that can be serialised into a HDF5 file."""
[docs] @classmethod @abstractmethod def from_hdf(cls: Type[_Thdf], source: h5py.Group) -> _Thdf: """Create a class instance by deserialising data from a HDF5 group. Args: source (:obj:`h5py.Group`): Group in an opened HDF5 file that contains the serialised data. Returns: :obj:`HDFSerializablep` """ pass
[docs] @abstractmethod def to_hdf(self, dest: h5py.Group) -> None: """Serialise the class instance into an existing HDF5 group. Args: dest (:obj:`h5py.Group`): Group in which the serialised data structures are created. """ pass
[docs] @classmethod def from_file(cls: Type[_Thdf], path: TypePathStr) -> _Thdf: """Create a class instance by deserialising data from a HDF5 file. Args: path (:obj:`pathlib.Path`, :obj:`str`): Group in an opened HDF5 file that contains the necessary data. Returns: :obj:`HDFSerializable` """ with h5py.File(str(path)) as f: return cls.from_hdf(f)
[docs] def to_file(self, path: TypePathStr) -> None: """Serialise the class instance to a new HDF5 file. Args: path (:obj:`pathlib.Path`, :obj:`str`): Path at which the HDF5 file is created. """ with h5py.File(str(path), mode="w") as f: self.to_hdf(f)
_Tdict = TypeVar("_Tdict", bound="DictRepresentation")
[docs] class DictRepresentation(ABC): """Base class for an object that can be serialised into a dictionary."""
[docs] @classmethod def from_dict( cls: Type[_Tdict], the_dict: dict[str, Any], **kwargs: dict[str, Any], # passing additional constructor data ) -> _Tdict: """Create a class instance from a dictionary representation of the minimally required data. Args: the_dict (:obj:`dict`): Dictionary containing the data. **kwargs: Additional data needed to construct the class instance. """ return cls(**the_dict)
[docs] def to_dict(self) -> dict[str, Any]: """Serialise the class instance to a dictionary containing a minimal set of required data. Returns: :obj:`dict` """ return asdict(self)