Increase unittest check_logcdf coverage and fix issues with some di…

…stribution methods (#4393) * Change `check_logcdf` to test that values below or above domain are properly evaluated. Fix issues with `Uniform`, `HalfNormal`, `Gamma`, and `InverseGamma` distributions. * Add multiple value test for logcdf. Add more informative comment for Gamma and InverseGamma hack. Update Release note. * Update release note * Update docstrings with valid value types * Update RELEASE-NOTES.md * Add more informative comments and remove TODO * TypeError: format -> f-strings * Ignore finite upper limit in Nat domains. Move new checks to `check_logcdf`. * Use `tt.switch` in `DiscreteUniform` for hard boundary (addresses previously failing test in 32bit OS)
pymc-devs · Jan 2, 2021 · acb8da0 · acb8da0
1 parent a21fafa
commit acb8da0
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 71 deletions.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -29,6 +29,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
 - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359))
 - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367))
 - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368))
+- Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise an informative error when multiple values cannot be evaluated in a single call. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393))
 
 ## PyMC3 3.10.0 (7 December 2020)
 

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -278,21 +278,24 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
         Returns
         -------
         TensorVariable
         """
+        lower = self.lower
+        upper = self.upper
+
         return tt.switch(
-            tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)),
+            tt.lt(value, lower) | tt.lt(upper, lower),
             -np.inf,
             tt.switch(
-                tt.eq(value, self.upper),
+                tt.lt(value, upper),
+                tt.log(value - lower) - tt.log(upper - lower),
                 0,
-                tt.log(value - self.lower) - tt.log(self.upper - self.lower),
             ),
         )
 
@@ -344,7 +347,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -401,7 +404,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -542,7 +545,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -900,7 +903,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -910,10 +913,10 @@ def logcdf(self, value):
         """
         sigma = self.sigma
         z = zvalue(value, mu=0, sigma=sigma)
-        return tt.switch(
-            tt.lt(z, -1.0),
-            tt.log(tt.erfcx(-z / tt.sqrt(2.0))) - tt.sqr(z),
+        return bound(
             tt.log1p(-tt.erfc(z / tt.sqrt(2.0))),
+            0 <= value,
+            0 < sigma,
         )
 
 
@@ -1106,7 +1109,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -1297,20 +1300,30 @@ def logcdf(self, value):
         Parameters
         ----------
         value: numeric
-            Value(s) for which log CDF is calculated. If the log CDF for multiple
-            values are desired the values must be provided in a numpy array or theano tensor.
+            Value(s) for which log CDF is calculated.
 
         Returns
         -------
         TensorVariable
         """
-        value = floatX(tt.as_tensor(value))
-        a = floatX(tt.as_tensor(self.alpha))
-        b = floatX(tt.as_tensor(self.beta))
-        return tt.switch(
-            tt.le(value, 0),
-            -np.inf,
-            tt.switch(tt.ge(value, 1), 0, tt.log(incomplete_beta(a, b, value))),
+        # incomplete_beta function can only handle scalar values (see #4342)
+        if np.ndim(value):
+            raise TypeError(
+                f"Beta.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
+            )
+
+        a = self.alpha
+        b = self.beta
+
+        return bound(
+            tt.switch(
+                tt.lt(value, 1),
+                tt.log(incomplete_beta(a, b, value)),
+                0,
+            ),
+            0 <= value,
+            0 < a,
+            0 < b,
         )
 
     def _distr_parameters_for_repr(self):
@@ -1515,7 +1528,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -1630,7 +1643,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -1786,7 +1799,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -1949,20 +1962,32 @@ def logcdf(self, value):
         Parameters
         ----------
         value: numeric
-            Value(s) for which log CDF is calculated. If the log CDF for multiple
-            values are desired the values must be provided in a numpy array or theano tensor.
+            Value(s) for which log CDF is calculated.
 
         Returns
         -------
         TensorVariable
         """
+        # incomplete_beta function can only handle scalar values (see #4342)
+        if np.ndim(value):
+            raise TypeError(
+                f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
+            )
+
         nu = self.nu
         mu = self.mu
         sigma = self.sigma
+        lam = self.lam
         t = (value - mu) / sigma
         sqrt_t2_nu = tt.sqrt(t ** 2 + nu)
         x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu)
-        return tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x))
+
+        return bound(
+            tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)),
+            0 < nu,
+            0 < sigma,
+            0 < lam,
+        )
 
 
 class Pareto(Continuous):
@@ -2084,7 +2109,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -2203,7 +2228,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -2311,7 +2336,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -2462,7 +2487,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -2472,7 +2497,17 @@ def logcdf(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(tt.log(tt.gammainc(alpha, beta * value)), value >= 0, alpha > 0, beta > 0)
+        # Avoid C-assertion when the gammainc function is called with invalid values (#4340)
+        safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
+        safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
+        safe_value = tt.switch(tt.lt(value, 0), 0, value)
+
+        return bound(
+            tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)),
+            0 <= value,
+            0 < alpha,
+            0 < beta,
+        )
 
     def _distr_parameters_for_repr(self):
         return ["alpha", "beta"]
@@ -2626,7 +2661,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -2636,11 +2671,16 @@ def logcdf(self, value):
         """
         alpha = self.alpha
         beta = self.beta
+        # Avoid C-assertion when the gammaincc function is called with invalid values (#4340)
+        safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
+        safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
+        safe_value = tt.switch(tt.lt(value, 0), 0, value)
+
         return bound(
-            tt.log(tt.gammaincc(alpha, beta / value)),
-            value >= 0,
-            alpha > 0,
-            beta > 0,
+            tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)),
+            0 <= value,
+            0 < alpha,
+            0 < beta,
         )
 
 
@@ -2802,7 +2842,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -3102,7 +3142,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -3491,7 +3531,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -3620,7 +3660,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -3902,7 +3942,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.
 
@@ -4244,7 +4284,7 @@ def logcdf(self, value):
 
         Parameters
         ----------
-        value: numeric
+        value: numeric or np.ndarray or theano.tensor
             Value(s) for which log CDF is calculated. If the log CDF for multiple
             values are desired the values must be provided in a numpy array or theano tensor.