Source code for yaw.core.abc

"""This module implements some abstract base classes that define the interfaces
for high level containers in other modules.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Type, TypeVar

import h5py
import numpy as np
import pandas as pd
from numpy.typing import NDArray

if TYPE_CHECKING:  # pragma: no cover
    from pandas import IntervalIndex

    from yaw.core.containers import Indexer
    from yaw.core.utils import TypePathStr

__all__ = [
    "PatchedQuantity",
    "BinnedQuantity",
    "concatenate_bin_edges",
    "HDFSerializable",
    "DictRepresentation",
]


_Tpatched = TypeVar("_Tpatched", bound="PatchedQuantity")



[docs]
class PatchedQuantity(ABC):
    """Base class for an object that has data organised in spatial patches."""

    @property
    @abstractmethod
    def n_patches(self) -> int:
        """Get the number of spatial patches."""
        pass

    @property
    @abstractmethod
    def patches(self) -> Indexer:
        """An :obj:`~yaw.core.containers.Indexer` attribute that supports
        iteration over the spatial patches or selecting a subset of the patches.

        The indexer always returns new container instances with the indexed
        data subset or the current item when iterating.

        .. Note::
            Indexing rules for a one-dimensional numpy array apply.

        Returns:
            :obj:`yaw.core.containers.Indexer`
        """
        pass


[docs]
    @abstractmethod
    def concatenate_patches(self: _Tpatched, *data: _Tpatched) -> _Tpatched:
        """Concatenate pair count data containers with equal redshift binning.

        The data is merged by extending the dimension of the patch axes. The
        resulting data array will be a block matrix of the input data arrays,
        i.e. all elements with correlations between different inputs set to
        zero.

        .. Note::
            Necessary condition for merging is that the the redshift binning of
            all inputs is identical. Cannot merge cross- with autocorrelation
            containers.

        Args:
            *data:
                Containers of same type that are appended to the patch dimension
                of this container.

        Returns:
            New instance of this container with combined data.
        """
        pass




_Tbinned = TypeVar("_Tbinned", bound="BinnedQuantity")



[docs]
class BinnedQuantity(ABC):
    """Base class for an object that has data organised in redshift bins."""


[docs]
    @abstractmethod
    def get_binning(self) -> IntervalIndex:
        """Get the underlying, exact redshift bin intervals.

        Returns:
            :obj:`pandas.IntervalIndex`
        """
        pass


    def __repr__(self) -> str:
        name = self.__class__.__name__
        n_bins = self.n_bins
        binning = self.get_binning()
        z = f"{binning[0].left:.3f}...{binning[-1].right:.3f}"
        return f"{name}({n_bins=}, {z=})"

    @property
    def n_bins(self) -> int:
        """Get the number of redshift bins."""
        return len(self.get_binning())

    @property
    def mids(self) -> NDArray[np.float64]:
        """Get the centers of the redshift bins as array."""
        return np.array([z.mid for z in self.get_binning()])

    @property
    def edges(self) -> NDArray[np.float64]:
        """Get the edges of the redshift bins as flat array."""
        binning = self.get_binning()
        return np.append(binning.left, binning.right[-1])

    @property
    def dz(self) -> NDArray[np.float64]:
        """Get the width of the redshift bins as array."""
        return np.diff(self.edges)

    @property
    def closed(self) -> str:
        """Specifies on which side the redshift bin intervals are closed, can
        be: ``left``, ``right``, ``both``, ``neither``."""
        return self.get_binning().closed

    @property
    @abstractmethod
    def bins(self) -> Indexer:
        """An :obj:`~yaw.core.containers.Indexer` attribute that supports
        iteration over the bins or selecting a subset of the bins.

        The indexer always returns new container instances with the indexed
        data subset or the current item when iterating.

        .. Warning::
            Indexing rules for a one-dimensional numpy array apply, however if
            the resulting binning is not contiguous or contains repeated bins,
            some operations on the returned container may fail.

        Returns:
            :obj:`yaw.core.containers.Indexer`
        """
        pass


[docs]
    def is_compatible(self: _Tbinned, other: _Tbinned, require: bool = False) -> bool:
        """Check whether this instance is compatible with another instance.

        Ensures that both objects are instances of the same class and that the
        redshift binning is identical.

        Args:
            other (:obj:`BinnedQuantity`):
                Object instance to compare to.
            require (:obj:`bool`, optional)
                Raise a ValueError if any of the checks fail.

        Returns:
            :obj:`bool`
        """
        if not isinstance(other, self.__class__):
            raise TypeError(
                f"object of type {type(other)} is not compatible with "
                f"{self.__class__}"
            )
        if self.n_bins != other.n_bins:
            if require:
                raise ValueError("number of bins do not agree")
            return False
        if np.any(self.get_binning() != other.get_binning()):
            if require:
                raise ValueError("binning is not identical")
            return False
        return True



[docs]
    @abstractmethod
    def concatenate_bins(self: _Tbinned, *data: _Tbinned) -> _Tbinned:
        """Concatenate pair count data containers with equal patches.

        The data is merged by appending the data along the redshift binning
        axis.

        .. Note::
            Necessary condition for merging is that the patch numbers are
            identical and that the merged binning is contiguous and
            non-overlapping. Cannot merge cross- with autocorrelation
            containers.

        Args:
            *data:
                Containers of same type that are appended to the patch dimension
                of this container.

        Returns:
            New instance of this container with combined data.
        """
        pass




def concatenate_bin_edges(*patched: BinnedQuantity) -> IntervalIndex:
    """Concatenate the binning a set of data containers.

    The input containers are automatically sorted by the lowest edge of the
    redshift binning. Necessary condidtions for mergning are are that the patch
    numbers are identical and that the resulting is contiguous and
    non-overlapping, i.e. the final edge of the previous binning must be
    identical to the lowest edge of the next binning.
    """
    patched = sorted([p for p in patched], key=lambda p: p.edges[0])
    reference = patched[0]
    edges = reference.edges
    for other in patched[1:]:
        if edges[-1] == other.edges[0]:
            edges = np.concatenate([edges, other.edges[1:]])
        else:
            raise ValueError("cannot merge, bins are not contiguous")
    return pd.IntervalIndex.from_breaks(edges, closed=reference.closed)


_Thdf = TypeVar("_Thdf", bound="HDFSerializable")



[docs]
class HDFSerializable(ABC):
    """Base class for an object that can be serialised into a HDF5 file."""


[docs]
    @classmethod
    @abstractmethod
    def from_hdf(cls: Type[_Thdf], source: h5py.Group) -> _Thdf:
        """Create a class instance by deserialising data from a HDF5 group.

        Args:
            source (:obj:`h5py.Group`):
                Group in an opened HDF5 file that contains the serialised data.

        Returns:
            :obj:`HDFSerializablep`
        """
        pass



[docs]
    @abstractmethod
    def to_hdf(self, dest: h5py.Group) -> None:
        """Serialise the class instance into an existing HDF5 group.

        Args:
            dest (:obj:`h5py.Group`):
                Group in which the serialised data structures are created.
        """
        pass



[docs]
    @classmethod
    def from_file(cls: Type[_Thdf], path: TypePathStr) -> _Thdf:
        """Create a class instance by deserialising data from a HDF5 file.

        Args:
            path (:obj:`pathlib.Path`, :obj:`str`):
                Group in an opened HDF5 file that contains the necessary data.

        Returns:
            :obj:`HDFSerializable`
        """
        with h5py.File(str(path)) as f:
            return cls.from_hdf(f)



[docs]
    def to_file(self, path: TypePathStr) -> None:
        """Serialise the class instance to a new HDF5 file.

        Args:
            path (:obj:`pathlib.Path`, :obj:`str`):
                Path at which the HDF5 file is created.
        """
        with h5py.File(str(path), mode="w") as f:
            self.to_hdf(f)




_Tdict = TypeVar("_Tdict", bound="DictRepresentation")



[docs]
class DictRepresentation(ABC):
    """Base class for an object that can be serialised into a dictionary."""


[docs]
    @classmethod
    def from_dict(
        cls: Type[_Tdict],
        the_dict: dict[str, Any],
        **kwargs: dict[str, Any],  # passing additional constructor data
    ) -> _Tdict:
        """Create a class instance from a dictionary representation of the
        minimally required data.

        Args:
            the_dict (:obj:`dict`):
                Dictionary containing the data.
            **kwargs: Additional data needed to construct the class instance.
        """
        return cls(**the_dict)



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Serialise the class instance to a dictionary containing a minimal set
        of required data.

        Returns:
            :obj:`dict`
        """
        return asdict(self)