Source code for refnx.analysis.bounds

import math
from scipy.stats import rv_continuous, uniform
from scipy.stats._distn_infrastructure import rv_frozen
from scipy._lib._util import check_random_state
import numpy as np



[docs]
class Bounds:
    """
    A base class that describes the probability distribution for a parameter

    """

    def __init__(self):
        pass


[docs]
    def logp(self, value):
        """
        Calculate the log-prior probability of a value with the probability
        distribution.

        Parameters
        ----------
        val : float or array-like
            variates to calculate the log-probability for

        Returns
        -------
        arr : float or np.ndarray
            The log-probabilty corresponding to each of the variates

        """
        raise NotImplementedError



[docs]
    def valid(self, val):
        """
        Checks whether a parameter value is within the support of the
        distribution. If it isn't then it returns a value that is within the
        support.

        Parameters
        ----------
        val : array-like
            values to examine

        Returns
        -------
        valid : array-like
            valid values within the support

        """
        raise NotImplementedError



[docs]
    def rvs(self, size=1, random_state=None):
        """
        Generate random variates from the probability distribution.

        Parameters
        ----------
        size : int or tuple
            Specifies the number, or array shape, of random variates to return.
        random_state : {None, int, float or :class:`numpy.random.RandomState`}
            For reproducible sampling

        Returns
        -------
        arr : array-like
            Random variates from within the probability distribution.

        """
        raise NotImplementedError



[docs]
    def invcdf(self, q):
        """
        Calculate the inverse of the cumulative distribution function for the
        prior.

        This is also known as the percent point function, or ppf.

        Parameters
        ----------
        q : array-like
            lower tail probability

        Returns
        -------
        x : array-like
            quantile corresponding to the lower tail probability q.
        """
        raise NotImplementedError





[docs]
class PDF(Bounds):
    """
    A class that describes the probability distribution for a parameter.

    Parameters
    ----------
    rv : :class:`scipy.stats.rv_continuous` or Object
        A continuous probability distribution. If `rv` is not an
        `rv_continuous`, then it must implement the `logpdf` and `rvs`
        methods.

    Examples
    --------

    >>> import scipy.stats as stats
    >>> from refnx.analysis import Parameter, PDF
    >>> p = Parameter(0.5)
    >>> # use a normal distribution for prior, mean=5 and sd=1.
    >>> p.bounds = PDF(stats.norm(5, 1))
    >>> p.logp(), stats.norm.logpdf(0.5, 5, 1)
    (-11.043938533204672, -11.043938533204672)

    """

    def __init__(self, rv):
        super().__init__()
        # we'll accept any object so long as it has logpdf and rvs methods
        if hasattr(rv, "logpdf") and hasattr(rv, "rvs") and hasattr(rv, "ppf"):
            self.rv = rv
        else:
            raise ValueError(
                "You must initialise PDF with an object that has"
                " logpdf, rvs, and ppf methods"
            )

    def __repr__(self):
        return f"PDF({self.rv!r})"


[docs]
    def logp(self, val):
        """
        Calculate the log-prior probability of a value with the probability
        distribution.

        Parameters
        ----------
        val : float or array-like
            variate to calculate the log-probability for

        Returns
        -------
        arr : float or np.ndarray
            The log-probabilty corresponding to each of the variates

        """
        return self.rv.logpdf(val)



[docs]
    def valid(self, val):
        """
        Checks whether a parameter value is within the support of the
        distribution. If it isn't then it returns a *random* value that is
        within the support.

        Parameters
        ----------
        val : array-like
            values to examine

        Returns
        -------
        valid : array-like
            valid values within the support
        """
        _val = np.asarray(val, dtype=float)
        valid = np.where(
            np.isfinite(self.logp(_val)), _val, self.rv.rvs(size=_val.shape)
        )

        return valid



[docs]
    def rvs(self, size=1, random_state=None):
        """
        Generate random variates from the probability distribution.

        Parameters
        ----------
        size : int or tuple
            Specifies the number, or array shape, of random variates to return.
        random_state : None, int, float or np.random.RandomState
            For reproducible sampling

        Returns
        -------
        arr : array-like
            Random variates from within the probability distribution.

        """
        return self.rv.rvs(size=size, random_state=random_state)



[docs]
    def invcdf(self, q):
        """
        Calculate the inverse of the cumulative distribution function for the
        uniform prior.

        This is also known as the percent point function, or ppf.

        Parameters
        ----------
        q : array-like
            lower tail probability

        Returns
        -------
        x : array-like
            quantile corresponding to the lower tail probability q.
        """
        return self.rv.ppf(q)





[docs]
class Interval(Bounds):
    r"""
    Describes a uniform probability distribution. May be open, semi-open,
    or closed.

    Parameters
    ----------
    lb : float
        The lower bound
    ub : float
        The upper bound

    Examples
    --------

    >>> from refnx.analysis import Parameter, Interval
    >>> p = Parameter(1)
    >>> # closed interval
    >>> p.bounds = Interval(0, 10)
    >>> p.logp([5, -1])
    array([-2.30258509,        -inf])

    A semi-closed interval will still prevent the fitter from accessing
    impossible locations.

    >>> p.bounds = Interval(lb=-10)
    >>> p.logp([5, -1])
    array([0., 0.])

    """

    def __init__(self, lb=-np.inf, ub=np.inf):
        super().__init__()
        if lb is None:
            lb = -np.inf
        if ub is None:
            ub = np.inf

        self._lb = lb
        self._ub = ub
        self._logprob = 0
        self._closed_bounds = False
        # IMPLEMENTATION NOTE - don't try to add frozen stats.uniform
        # as an attribute to this class. The rv_frozen class takes
        # a long time to pickle/unpickle. gh483 fixes a perf regression
        # that did this.
        self._set_bounds(self._lb, self._ub)

    def _set_bounds(self, lb, ub):
        self._lb = float(min(lb, ub))
        self._ub = float(max(lb, ub))
        self._closed_bounds = False

        if math.isnan(self._lb) or math.isnan(self._ub):
            raise ValueError("Can't set Interval with NaN")

        if math.isfinite(self._lb) and math.isfinite(self._ub):
            self._closed_bounds = True
            if self._lb == self._ub:
                self._logprob = 0.0
            else:
                self._logprob = math.log(1 / (self._ub - self._lb))
        else:
            self._logprob = 0.0
            self._closed_bounds = False

    @property
    def rv(self):
        return uniform(self._lb, self._ub - self._lb)

    def __repr__(self):
        lb, ub = self.lb, self.ub
        if np.isneginf(self.lb):
            lb = "-np.inf"
        if np.isinf(self.ub):
            ub = "np.inf"
        return f"Interval(lb={lb}, ub={ub})"

    def __str__(self):
        return f"[{self.lb}, {self.ub}]"

    @property
    def lb(self):
        """
        Lower bound of uniform distribution
        """
        return self._lb

    @lb.setter
    def lb(self, val):
        if val is None:
            val = -np.inf
        self._set_bounds(val, self._ub)

    @property
    def ub(self):
        """
        Upper bound of uniform distribution
        """
        return self._ub

    @ub.setter
    def ub(self, val):
        if val is None:
            val = np.inf
        self._set_bounds(self._lb, val)


[docs]
    def logp(self, val):
        """
        Calculate the log-prior probability of a value with the Interval.

        Parameters
        ----------
        val : float or array-like
            variates to calculate the log-probability for

        Returns
        -------
        arr : float or np.ndarray
            The log-probabilty corresponding to each of the variates
        """
        # Special case when val is array-like. We'll assume that it may be
        # array-like if it has a length. This special casing is done because
        # a lot of time is spent on the `asarray`, `logical_and` and `where`.
        # Most of the time val is a single float, so this speedup is
        # appreciable.
        if hasattr(val, "__len__"):
            _val = np.asarray(val, dtype=float)
            valid = np.logical_and(self._lb <= _val, _val <= self._ub)
            prob = np.where(valid, self._logprob, -np.inf)
            return prob

        if self._lb <= val <= self._ub:
            return self._logprob

        return -np.inf



[docs]
    def valid(self, val):
        """
        Checks whether a parameter value is within the support of the
        Interval. If it isn't then it returns a value that is within the
        support.
        If the Interval is closed (i.e. lower and upper bounds are both
        specified) then invalid values will be corrected by random samples
        taken between the lower and upper bounds.
        If the interval is semi-open, only one of the bounds being
        specified, then invalid values will be corrected by the value being
        reflected by the same distance from the relevant limit.

        Parameters
        ----------
        val : array-like
            values to examine

        Returns
        -------
        valid : array-like
            values within the support

        Examples
        --------
        >>> b = Interval(0, 10)
        >>> b.valid(11.5)
        8.5
        """
        _val = np.asarray(val, dtype=float)
        if self._closed_bounds:
            valid = np.where(
                np.isfinite(self.logp(_val)), _val, self.rvs(size=_val.shape)
            )
        else:
            valid = np.where(_val < self._ub, _val, 2 * self._ub - _val)
            valid = np.where(valid > self._lb, valid, 2 * self._lb - valid)

        return valid



[docs]
    def rvs(self, size=1, random_state=None):
        if self._closed_bounds:
            rng = check_random_state(random_state)
            return rng.uniform(self._lb, self._ub, size=size)
        else:
            raise RuntimeError(
                "Can't ask for a random variate from a" " semi-closed interval"
            )



[docs]
    def invcdf(self, q):
        """
        Calculate the inverse of the cumulative distribution function for the
        uniform prior.

        This is also known as the percent point function, or ppf.

        Parameters
        ----------
        q : array-like
            lower tail probability

        Returns
        -------
        x : array-like
            quantile corresponding to the lower tail probability q.
        """
        if self._closed_bounds:
            # this is what a ppf transform for stats.uniform does
            scale = self._ub - self._lb
            loc = self._lb

            return scale * q + loc
        else:
            raise RuntimeError("Can only calculate invcdf with closed bounds")