From e8f75fd69764d6c8fecf4f12185b64cc750ba95a Mon Sep 17 00:00:00 2001
From: Tomoki Ohtsuki <tomoki.otsuki@bizreach.co.jp>
Date: Thu, 19 Nov 2020 18:07:26 +0900
Subject: [PATCH 1/6] Add numerically safer ordered probit distribution.

---
 pymc3/distributions/__init__.py   |   2 +
 pymc3/distributions/discrete.py   | 140 +++++++++++++++++++++++++++++-
 pymc3/distributions/dist_math.py  |  40 +++++++++
 pymc3/tests/test_distributions.py |  23 ++++-
 4 files changed, 203 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index 2a671d3293..b609ae5588 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -65,6 +65,7 @@
 from .discrete import Geometric
 from .discrete import Categorical
 from .discrete import OrderedLogistic
+from .discrete import OrderedProbit
 
 from .distribution import DensityDist
 from .distribution import Distribution
@@ -143,6 +144,7 @@
     "Geometric",
     "Categorical",
     "OrderedLogistic",
+    "OrderedProbit",
     "DensityDist",
     "Distribution",
     "Continuous",
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index fd2d399618..ff666ec925 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -17,7 +17,17 @@
 from scipy import stats
 import warnings
 
-from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
+from .dist_math import (
+    bound,
+    factln,
+    binomln,
+    betaln,
+    logpow,
+    random_choice,
+    normal_lcdf,
+    normal_lccdf,
+    log_diff_normal_cdf,
+)
 from .distribution import Discrete, draw_values, generate_samples
 from .shape_utils import broadcast_distribution_samples
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
@@ -1517,3 +1527,131 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         p = p_cum[..., 1:] - p_cum[..., :-1]
 
         super().__init__(p=p, *args, **kwargs)
+
+
+class OrderedProbit(Categorical):
+    R"""
+    Ordered Probit log-likelihood.
+
+    Useful for regression on ordinal data values whose values range
+    from 1 to K as a function of some predictor, :math:`\eta`. The
+    cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
+    mapped to which of the K observed dependent variables.  The number
+    of cutpoints is K - 1.  It is recommended that the cutpoints are
+    constrained to be ordered.
+
+    In order to stabilize the computation, log-likelihood is computed
+    in log space using the scaled error function `erfcx`.
+
+    .. math::
+
+       f(k \mid \eta, c) = \left\{
+         \begin{array}{l}
+           1 - \text{normal_cdf}(0, \sigma, \eta - c_1)
+             \,, \text{if } k = 0 \\
+           \text{normal_cdf}(0, \sigma, \eta - c_{k - 1}) -
+           \text{normal_cdf}(0, \sigma, \eta - c_{k})
+             \,, \text{if } 0 < k < K \\
+           \text{normal_cdf}(0, \sigma, \eta - c_{K - 1})
+             \,, \text{if } k = K \\
+         \end{array}
+       \right.
+
+    Parameters
+    ----------
+    eta : float
+        The predictor.
+    c : array
+        The length K - 1 array of cutpoints which break :math:`\eta` into
+        ranges.  Do not explicitly set the first and last elements of
+        :math:`c` to negative and positive infinity.
+
+    sigma: float
+         The standard deviation of probit function.
+    Example
+    --------
+    .. code:: python
+
+        # Generate data for a simple 1 dimensional example problem
+        n1_c = 300; n2_c = 300; n3_c = 300
+        cluster1 = np.random.randn(n1_c) + -1
+        cluster2 = np.random.randn(n2_c) + 0
+        cluster3 = np.random.randn(n3_c) + 2
+
+        x = np.concatenate((cluster1, cluster2, cluster3))
+        y = np.concatenate((1*np.ones(n1_c),
+                            2*np.ones(n2_c),
+                            3*np.ones(n3_c))) - 1
+
+        # Ordered logistic regression
+        with pm.Model() as model:
+            cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
+                                  transform=pm.distributions.transforms.ordered)
+            y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y)
+            tr = pm.sample(1000)
+
+        # Plot the results
+        plt.hist(cluster1, 30, alpha=0.5);
+        plt.hist(cluster2, 30, alpha=0.5);
+        plt.hist(cluster3, 30, alpha=0.5);
+        plt.hist(tr["cutpoints"][:,0], 80, alpha=0.2, color='k');
+        plt.hist(tr["cutpoints"][:,1], 80, alpha=0.2, color='k');
+
+    """
+
+    def __init__(self, eta, cutpoints, *args, **kwargs):
+
+        self.eta = tt.as_tensor_variable(floatX(eta))
+        self.cutpoints = tt.as_tensor_variable(cutpoints)
+
+        probits = tt.shape_padright(self.eta) - self.cutpoints
+        _log_p = tt.concatenate(
+            [
+                tt.shape_padright(normal_lccdf(0, 1, probits[..., 0])),
+                log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]),
+                tt.shape_padright(normal_lcdf(0, 1, probits[..., -1])),
+            ],
+            axis=-1,
+        )
+        _log_p = tt.as_tensor_variable(floatX(_log_p))
+
+        self._log_p = _log_p
+        self.mode = tt.argmax(_log_p, axis=-1)
+        p = tt.exp(_log_p)
+
+        super().__init__(p=p, *args, **kwargs)
+
+    def logp(self, value):
+        r"""
+        Calculate log-probability of Ordered Probit distribution at specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or theano tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+        logp = self._log_p
+        k = self.k
+
+        # Clip values before using them for indexing
+        value_clip = tt.clip(value, 0, k - 1)
+
+        if logp.ndim > 1:
+            if logp.ndim > value_clip.ndim:
+                value_clip = tt.shape_padleft(value_clip, logp.ndim - value_clip.ndim)
+            elif logp.ndim < value_clip.ndim:
+                logp = tt.shape_padleft(logp, value_clip.ndim - logp.ndim)
+            pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1))
+            a = take_along_axis(
+                logp.dimshuffle(pattern),
+                value_clip,
+            )
+        else:
+            a = logp[value_clip]
+
+        return bound(a, value >= 0, value <= (k - 1))
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 1af8e86c72..f71838c245 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -133,6 +133,46 @@ def normal_lccdf(mu, sigma, x):
     )
 
 
+def log_diff_normal_cdf(mu, sigma, x, y):
+    """
+    Compute :math:`log(\Phi(\frac{x - \mu}{\sigma}) - \Phi(\frac{y - \mu}{\sigma}))` safely in log space.
+
+    Parameters
+    ----------
+    mu: float
+        mean
+    sigma: float
+        std
+
+    x: float
+
+    y: float
+        must be strictly less than x.
+
+    Returns
+    -------
+    log (\Phi(x) - \Phi(y))
+
+    """
+    x = (x - mu) / sigma / tt.sqrt(2.0)
+    y = (y - mu) / sigma / tt.sqrt(2.0)
+
+    # To stabilize the computation, consider these three regions:
+    # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
+    # 2) 0 > x > 0 => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
+    # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
+    return tt.log(0.5) + tt.switch(
+        tt.gt(y, 0),
+        -tt.square(y) + tt.log(tt.erfcx(y) - tt.exp(tt.square(y) - tt.square(x)) * tt.erfcx(x)),
+        tt.switch(
+            tt.lt(x, 0),  # 0 > x > y
+            -tt.square(x)
+            + tt.log(tt.erfcx(-x) - tt.exp(tt.square(x) - tt.square(y)) * tt.erfcx(-y)),
+            tt.log(tt.erf(x) - tt.erf(y)),  # x >0 > y
+        ),
+    )
+
+
 def sigma2rho(sigma):
     """
     `sigma -> rho` theano converter
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 20349d104f..a0c5522724 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -66,6 +66,7 @@
     Gumbel,
     Logistic,
     OrderedLogistic,
+    OrderedProbit,
     LogitNormal,
     Interpolated,
     ZeroInflatedBinomial,
@@ -89,7 +90,7 @@
 from scipy import integrate
 import scipy.stats.distributions as sp
 import scipy.stats
-from scipy.special import logit
+from scipy.special import logit, erf, erfcx
 import theano
 import theano.tensor as tt
 from ..math import kronecker
@@ -429,6 +430,17 @@ def orderedlogistic_logpdf(value, eta, cutpoints):
     return np.where(np.all(ps >= 0), np.log(p), -np.inf)
 
 
+def invprobit(x):
+    return (erf(x / np.sqrt(2)) + 1) / 2
+
+
+def orderedprobit_logpdf(value, eta, cutpoints):
+    c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
+    ps = np.array([invprobit(eta - cc) - invprobit(eta - cc1) for cc, cc1 in zip(c[:-1], c[1:])])
+    p = ps[value]
+    return np.where(np.all(ps >= 0), np.log(p), -np.inf)
+
+
 class Simplex:
     def __init__(self, n):
         self.vals = list(simplex_values(n))
@@ -1516,6 +1528,15 @@ def test_orderedlogistic(self, n):
             lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints),
         )
 
+    @pytest.mark.parametrize("n", [2, 3, 4])
+    def test_orderedprobit(self, n):
+        self.pymc3_matches_scipy(
+            OrderedProbit,
+            Domain(range(n), "int64"),
+            {"eta": R, "cutpoints": SortedVector(n - 1)},
+            lambda value, eta, cutpoints: orderedprobit_logpdf(value, eta, cutpoints),
+        )
+
     def test_densitydist(self):
         def logp(x):
             return -log(2 * 0.5) - abs(x - 0.5) / 0.5

From 53c2a0da03cc992a9370afa3049efeeed211a167 Mon Sep 17 00:00:00 2001
From: Tomoki Ohtsuki <tomoki.otsuki@bizreach.co.jp>
Date: Thu, 3 Dec 2020 13:42:22 +0900
Subject: [PATCH 2/6] Fix styling issue.

---
 pymc3/distributions/dist_math.py  | 4 ++--
 pymc3/tests/test_distributions.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index f71838c245..55df628be6 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -135,7 +135,7 @@ def normal_lccdf(mu, sigma, x):
 
 def log_diff_normal_cdf(mu, sigma, x, y):
     """
-    Compute :math:`log(\Phi(\frac{x - \mu}{\sigma}) - \Phi(\frac{y - \mu}{\sigma}))` safely in log space.
+    Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space.
 
     Parameters
     ----------
@@ -151,7 +151,7 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 
     Returns
     -------
-    log (\Phi(x) - \Phi(y))
+    log (\\Phi(x) - \\Phi(y))
 
     """
     x = (x - mu) / sigma / tt.sqrt(2.0)
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 44e1ad43cd..be54e63b51 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -90,7 +90,7 @@
 from scipy import integrate
 import scipy.stats.distributions as sp
 import scipy.stats
-from scipy.special import logit, erf, erfcx
+from scipy.special import logit, erf
 import theano
 import theano.tensor as tt
 from ..math import kronecker

From c3615bbf5bb8eb833208af5680a3216b5b2d4080 Mon Sep 17 00:00:00 2001
From: Tomoki Ohtsuki <tomoki.otsuki@bizreach.co.jp>
Date: Thu, 3 Dec 2020 15:32:00 +0900
Subject: [PATCH 3/6] Use smaller values for probit & cutpoint testing

---
 pymc3/tests/test_distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index be54e63b51..8df69656e9 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1543,7 +1543,7 @@ def test_orderedprobit(self, n):
         self.pymc3_matches_scipy(
             OrderedProbit,
             Domain(range(n), "int64"),
-            {"eta": R, "cutpoints": SortedVector(n - 1)},
+            {"eta": Runif, "cutpoints": UnitSortedVector(n - 1)},
             lambda value, eta, cutpoints: orderedprobit_logpdf(value, eta, cutpoints),
         )
 

From cef5bed06c7d3688a22938c539a1f04b7e1bd7fb Mon Sep 17 00:00:00 2001
From: Michael Osthege <m.osthege@fz-juelich.de>
Date: Tue, 8 Dec 2020 20:46:34 +0100
Subject: [PATCH 4/6] fix import sorting

---
 pymc3/distributions/discrete.py   | 2 +-
 pymc3/tests/test_distributions.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index d98e9617d8..9662ff2693 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -26,8 +26,8 @@
     factln,
     log_diff_normal_cdf,
     logpow,
-    normal_lcdf,
     normal_lccdf,
+    normal_lcdf,
     random_choice,
 )
 from pymc3.distributions.distribution import Discrete, draw_values, generate_samples
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 32281e687d..daffdbe8e1 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -26,7 +26,7 @@
 from numpy import array, exp, inf, log
 from numpy.testing import assert_allclose, assert_almost_equal, assert_equal
 from scipy import integrate
-from scipy.special import logit, erf
+from scipy.special import erf, logit
 
 import pymc3 as pm
 

From 65b5a4d6a5ba7da375241a016a350d62ec2b31c0 Mon Sep 17 00:00:00 2001
From: Tomoki Ohtsuki <tomoki.otsuki@bizreach.co.jp>
Date: Wed, 16 Dec 2020 10:46:59 +0900
Subject: [PATCH 5/6] Typo fix

---
 pymc3/distributions/discrete.py  | 2 +-
 pymc3/distributions/dist_math.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 9662ff2693..34fb39a48f 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1697,7 +1697,7 @@ class OrderedProbit(Categorical):
                             2*np.ones(n2_c),
                             3*np.ones(n3_c))) - 1
 
-        # Ordered logistic regression
+        # Ordered probit regression
         with pm.Model() as model:
             cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
                                   transform=pm.distributions.transforms.ordered)
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 99bf11d2dc..e302000735 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -160,7 +160,7 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 
     # To stabilize the computation, consider these three regions:
     # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
-    # 2) 0 > x > 0 => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
+    # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
     # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
     return tt.log(0.5) + tt.switch(
         tt.gt(y, 0),

From 7b27830aa7971d8180497b52f7a73fe2b60146e0 Mon Sep 17 00:00:00 2001
From: Tomoki Ohtsuki <tomoki.otsuki@bizreach.co.jp>
Date: Wed, 16 Dec 2020 10:55:14 +0900
Subject: [PATCH 6/6] add a line to RELEASE-NOTES.md

---
 RELEASE-NOTES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 8858828c06..0700dd84db 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -9,6 +9,8 @@ This is the first release to support Python3.9 and to drop Python3.6.
 - Make `sample_shape` same across all contexts in `draw_values` (see [#4305](https://github.com/pymc-devs/pymc3/pull/4305)).
 - Removed `theanof.set_theano_config` because it illegally touched Theano's privates (see [#4329](https://github.com/pymc-devs/pymc3/pull/4329)).
 
+### New Features
+- `OrderedProbit` distribution added (see [#4232](https://github.com/pymc-devs/pymc3/pull/4232)).
 
 ## PyMC3 3.10.0 (7 December 2020)