From 2e6c2c43e33d34b1fd5bc85da536729abdb47d31 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Tue, 9 Apr 2024 20:24:42 +0200
Subject: [PATCH] Reimplement several RandomVariables as
 SymbolicRandomVariables

This allows sampling from multiple backends without having to dispatch for each one
---
 pymc/distributions/continuous.py   | 158 ++++++++++++++++-------------
 pymc/distributions/discrete.py     |  35 ++++---
 pymc/distributions/multivariate.py |  56 ++++------
 3 files changed, 130 insertions(+), 119 deletions(-)

diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 2074dbcd54d..44b0b2f71ec 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -52,10 +52,12 @@
     vonmises,
 )
 from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.utils import normalize_size_param
 from pytensor.tensor.variable import TensorConstant
 
 from pymc.logprob.abstract import _logprob_helper
 from pymc.logprob.basic import icdf
+from pymc.pytensorf import normalize_rng_param
 
 try:
     from polyagamma import polyagamma_cdf, polyagamma_pdf, random_polyagamma
@@ -73,7 +75,6 @@ def polyagamma_cdf(*args, **kwargs):
 
 from scipy import stats
 from scipy.interpolate import InterpolatedUnivariateSpline
-from scipy.special import expit
 
 from pymc.distributions import transforms
 from pymc.distributions.dist_math import (
@@ -90,8 +91,8 @@ def polyagamma_cdf(*args, **kwargs):
     normal_lcdf,
     zvalue,
 )
-from pymc.distributions.distribution import DIST_PARAMETER_TYPES, Continuous
-from pymc.distributions.shape_utils import rv_size_is_none
+from pymc.distributions.distribution import DIST_PARAMETER_TYPES, Continuous, SymbolicRandomVariable
+from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none
 from pymc.distributions.transforms import _default_transform
 from pymc.math import invlogit, logdiffexp, logit
 
@@ -1236,20 +1237,28 @@ def icdf(value, alpha, beta):
         )
 
 
-class KumaraswamyRV(RandomVariable):
+class KumaraswamyRV(SymbolicRandomVariable):
     name = "kumaraswamy"
-    ndim_supp = 0
-    ndims_params = [0, 0]
-    dtype = "floatX"
+    signature = "[rng],[size],(),()->[rng],()"
     _print_name = ("Kumaraswamy", "\\operatorname{Kumaraswamy}")
 
     @classmethod
-    def rng_fn(cls, rng, a, b, size) -> np.ndarray:
-        u = rng.uniform(size=size)
-        return np.asarray((1 - (1 - u) ** (1 / b)) ** (1 / a))
+    def rv_op(cls, a, b, *, size=None, rng=None):
+        a = pt.as_tensor(a)
+        b = pt.as_tensor(b)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
 
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(a, b, ndims_params=cls.ndims_params)
 
-kumaraswamy = KumaraswamyRV()
+        next_rng, u = uniform(size=size, rng=rng).owner.outputs
+        draws = (1 - (1 - u) ** (1 / b)) ** (1 / a)
+
+        return cls(
+            inputs=[rng, size, a, b],
+            outputs=[next_rng, draws],
+        )(rng, size, a, b)
 
 
 class Kumaraswamy(UnitContinuous):
@@ -1296,13 +1305,11 @@ class Kumaraswamy(UnitContinuous):
         b > 0.
     """
 
-    rv_op = kumaraswamy
+    rv_type = KumaraswamyRV
+    rv_op = KumaraswamyRV.rv_op
 
     @classmethod
     def dist(cls, a: DIST_PARAMETER_TYPES, b: DIST_PARAMETER_TYPES, *args, **kwargs):
-        a = pt.as_tensor_variable(a)
-        b = pt.as_tensor_variable(b)
-
         return super().dist([a, b], *args, **kwargs)
 
     def support_point(rv, size, a, b):
@@ -1533,24 +1540,32 @@ def icdf(value, mu, b):
         return check_icdf_parameters(res, b > 0, msg="b > 0")
 
 
-class AsymmetricLaplaceRV(RandomVariable):
+class AsymmetricLaplaceRV(SymbolicRandomVariable):
     name = "asymmetriclaplace"
-    ndim_supp = 0
-    ndims_params = [0, 0, 0]
-    dtype = "floatX"
+    signature = "[rng],[size],(),(),()->[rng],()"
     _print_name = ("AsymmetricLaplace", "\\operatorname{AsymmetricLaplace}")
 
     @classmethod
-    def rng_fn(cls, rng, b, kappa, mu, size=None) -> np.ndarray:
-        u = rng.uniform(size=size)
+    def rv_op(cls, b, kappa, mu, *, size=None, rng=None):
+        b = pt.as_tensor(b)
+        kappa = pt.as_tensor(kappa)
+        mu = pt.as_tensor(mu)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
+
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(b, kappa, mu, ndims_params=cls.ndims_params)
+
+        next_rng, u = uniform(size=size, rng=rng).owner.outputs
         switch = kappa**2 / (1 + kappa**2)
-        non_positive_x = mu + kappa * np.log(u * (1 / switch)) / b
-        positive_x = mu - np.log((1 - u) * (1 + kappa**2)) / (kappa * b)
+        non_positive_x = mu + kappa * pt.log(u * (1 / switch)) / b
+        positive_x = mu - pt.log((1 - u) * (1 + kappa**2)) / (kappa * b)
         draws = non_positive_x * (u <= switch) + positive_x * (u > switch)
-        return np.asarray(draws)
-
 
-asymmetriclaplace = AsymmetricLaplaceRV()
+        return cls(
+            inputs=[rng, size, b, kappa, mu],
+            outputs=[next_rng, draws],
+        )(rng, size, b, kappa, mu)
 
 
 class AsymmetricLaplace(Continuous):
@@ -1599,15 +1614,12 @@ class AsymmetricLaplace(Continuous):
     of interest.
     """
 
-    rv_op = asymmetriclaplace
+    rv_type = AsymmetricLaplaceRV
+    rv_op = AsymmetricLaplaceRV.rv_op
 
     @classmethod
     def dist(cls, kappa=None, mu=None, b=None, q=None, *args, **kwargs):
         kappa = cls.get_kappa(kappa, q)
-        b = pt.as_tensor_variable(b)
-        kappa = pt.as_tensor_variable(kappa)
-        mu = pt.as_tensor_variable(mu)
-
         return super().dist([b, kappa, mu], *args, **kwargs)
 
     @classmethod
@@ -2475,7 +2487,6 @@ def dist(cls, nu, **kwargs):
         return Gamma.dist(alpha=nu / 2, beta=1 / 2, **kwargs)
 
 
-# TODO: Remove this once logp for multiplication is working!
 class WeibullBetaRV(RandomVariable):
     name = "weibull"
     ndim_supp = 0
@@ -2597,19 +2608,22 @@ def icdf(value, alpha, beta):
         )
 
 
-class HalfStudentTRV(RandomVariable):
+class HalfStudentTRV(SymbolicRandomVariable):
     name = "halfstudentt"
-    ndim_supp = 0
-    ndims_params = [0, 0]
-    dtype = "floatX"
+    signature = "[rng],[size],(),()->[rng],()"
     _print_name = ("HalfStudentT", "\\operatorname{HalfStudentT}")
 
     @classmethod
-    def rng_fn(cls, rng, nu, sigma, size=None) -> np.ndarray:
-        return np.asarray(np.abs(stats.t.rvs(nu, scale=sigma, size=size, random_state=rng)))
+    def rv_op(cls, nu, sigma, *, size=None, rng=None) -> np.ndarray:
+        nu = pt.as_tensor(nu)
+        sigma = pt.as_tensor(sigma)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
 
+        next_rng, t_draws = t(df=nu, scale=sigma, size=size, rng=rng).owner.outputs
+        draws = pt.abs(t_draws)
 
-halfstudentt = HalfStudentTRV()
+        return cls(inputs=[rng, size, nu, sigma], outputs=[next_rng, draws])(rng, size, nu, sigma)
 
 
 class HalfStudentT(PositiveContinuous):
@@ -2671,14 +2685,12 @@ class HalfStudentT(PositiveContinuous):
             x = pm.HalfStudentT('x', lam=4, nu=10)
     """
 
-    rv_op = halfstudentt
+    rv_type = HalfStudentTRV
+    rv_op = HalfStudentTRV.rv_op
 
     @classmethod
     def dist(cls, nu, sigma=None, lam=None, *args, **kwargs):
-        nu = pt.as_tensor_variable(nu)
         lam, sigma = get_tau_sigma(lam, sigma)
-        sigma = pt.as_tensor_variable(sigma)
-
         return super().dist([nu, sigma], *args, **kwargs)
 
     def support_point(rv, size, nu, sigma):
@@ -2710,19 +2722,29 @@ def logp(value, nu, sigma):
         )
 
 
-class ExGaussianRV(RandomVariable):
+class ExGaussianRV(SymbolicRandomVariable):
     name = "exgaussian"
-    ndim_supp = 0
-    ndims_params = [0, 0, 0]
-    dtype = "floatX"
+    signature = "[rng],[size],(),(),()->[rng],()"
     _print_name = ("ExGaussian", "\\operatorname{ExGaussian}")
 
     @classmethod
-    def rng_fn(cls, rng, mu, sigma, nu, size=None) -> np.ndarray:
-        return np.asarray(rng.normal(mu, sigma, size=size) + rng.exponential(scale=nu, size=size))
+    def rv_op(cls, mu, sigma, nu, *, size=None, rng=None):
+        mu = pt.as_tensor(mu)
+        sigma = pt.as_tensor(sigma)
+        nu = pt.as_tensor(nu)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
 
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(mu, sigma, nu, ndims_params=cls.ndims_params)
 
-exgaussian = ExGaussianRV()
+        next_rng, normal_draws = normal(loc=mu, scale=sigma, size=size, rng=rng)
+        final_rng, exponential_draws = exponential(scale=nu, size=size, rng=next_rng)
+        draws = normal_draws + exponential_draws
+
+        return cls(inputs=[rng, size, mu, sigma, nu], outputs=[final_rng, draws])(
+            rng, size, mu, sigma, nu
+        )
 
 
 class ExGaussian(Continuous):
@@ -2792,14 +2814,11 @@ class ExGaussian(Continuous):
         Vol. 4, No. 1, pp 35-45.
     """
 
-    rv_op = exgaussian
+    rv_type = ExGaussianRV
+    rv_op = ExGaussianRV.rv_op
 
     @classmethod
     def dist(cls, mu=0.0, sigma=None, nu=None, *args, **kwargs):
-        mu = pt.as_tensor_variable(mu)
-        sigma = pt.as_tensor_variable(sigma)
-        nu = pt.as_tensor_variable(nu)
-
         return super().dist([mu, sigma, nu], *args, **kwargs)
 
     def support_point(rv, size, mu, sigma, nu):
@@ -3477,19 +3496,25 @@ def icdf(value, mu, s):
         )
 
 
-class LogitNormalRV(RandomVariable):
+class LogitNormalRV(SymbolicRandomVariable):
     name = "logit_normal"
-    ndim_supp = 0
-    ndims_params = [0, 0]
-    dtype = "floatX"
+    signature = "[rng],[size],(),()->[rng],()"
     _print_name = ("logitNormal", "\\operatorname{logitNormal}")
 
     @classmethod
-    def rng_fn(cls, rng, mu, sigma, size=None) -> np.ndarray:
-        return np.asarray(expit(stats.norm.rvs(loc=mu, scale=sigma, size=size, random_state=rng)))
+    def rv_op(cls, mu, sigma, *, size=None, rng=None):
+        mu = pt.as_tensor(mu)
+        sigma = pt.as_tensor(sigma)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
 
+        next_rng, normal_draws = normal(loc=mu, scale=sigma, size=size, rng=rng).owner.outputs
+        draws = pt.expit(normal_draws)
 
-logit_normal = LogitNormalRV()
+        return cls(
+            inputs=[rng, size, mu, sigma],
+            outputs=[next_rng, draws],
+        )(rng, size, mu, sigma)
 
 
 class LogitNormal(UnitContinuous):
@@ -3540,15 +3565,12 @@ class LogitNormal(UnitContinuous):
         Defaults to 1.
     """
 
-    rv_op = logit_normal
+    rv_type = LogitNormalRV
+    rv_op = LogitNormalRV.rv_op
 
     @classmethod
     def dist(cls, mu=0, sigma=None, tau=None, **kwargs):
-        mu = pt.as_tensor_variable(mu)
-        tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
-        sigma = pt.as_tensor_variable(sigma)
-        tau = pt.as_tensor_variable(tau)
-
+        _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
         return super().dist([mu, sigma], **kwargs)
 
     def support_point(rv, size, mu, sigma):
diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index c18a88690a0..b260580a61f 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -18,7 +18,6 @@
 
 from pytensor.tensor import TensorConstant
 from pytensor.tensor.random.basic import (
-    RandomVariable,
     ScipyRandomVariable,
     bernoulli,
     betabinom,
@@ -28,7 +27,9 @@
     hypergeometric,
     nbinom,
     poisson,
+    uniform,
 )
+from pytensor.tensor.random.utils import normalize_size_param
 from scipy import stats
 
 import pymc as pm
@@ -45,8 +46,8 @@
     normal_lccdf,
     normal_lcdf,
 )
-from pymc.distributions.distribution import Discrete
-from pymc.distributions.shape_utils import rv_size_is_none
+from pymc.distributions.distribution import Discrete, SymbolicRandomVariable
+from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none
 from pymc.logprob.basic import logcdf, logp
 from pymc.math import sigmoid
 
@@ -65,6 +66,8 @@
     "OrderedProbit",
 ]
 
+from pymc.pytensorf import normalize_rng_param
+
 
 class Binomial(Discrete):
     R"""
@@ -387,20 +390,25 @@ def logcdf(value, p):
         )
 
 
-class DiscreteWeibullRV(RandomVariable):
+class DiscreteWeibullRV(SymbolicRandomVariable):
     name = "discrete_weibull"
-    ndim_supp = 0
-    ndims_params = [0, 0]
-    dtype = "int64"
+    signature = "[rng],[size],(),()->[rng],()"
     _print_name = ("dWeibull", "\\operatorname{dWeibull}")
 
     @classmethod
-    def rng_fn(cls, rng, q, beta, size):
-        p = rng.uniform(size=size)
-        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1
+    def rv_op(cls, q, beta, *, size=None, rng=None):
+        q = pt.as_tensor(q)
+        beta = pt.as_tensor(beta)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
+
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(q, beta, ndims_params=cls.ndims_params)
 
+        next_rng, p = uniform(size=size, rng=rng).owner.outputs
+        draws = pt.ceil(pt.power(pt.log(1 - p) / pt.log(q), 1.0 / beta)) - 1
 
-discrete_weibull = DiscreteWeibullRV()
+        return cls(inputs=[rng, size, q, beta], outputs=[next_rng, draws])(rng, size, q, beta)
 
 
 class DiscreteWeibull(Discrete):
@@ -452,12 +460,11 @@ def DiscreteWeibull(q, b, x):
 
     """
 
-    rv_op = discrete_weibull
+    rv_type = DiscreteWeibullRV
+    rv_op = DiscreteWeibullRV.rv_op
 
     @classmethod
     def dist(cls, q, beta, *args, **kwargs):
-        q = pt.as_tensor_variable(q)
-        beta = pt.as_tensor_variable(beta)
         return super().dist([q, beta], **kwargs)
 
     def support_point(rv, size, q, beta):
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index f6c572523db..e36c24da39e 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -65,6 +65,7 @@
     broadcast_dist_samples_shape,
     change_dist_size,
     get_support_shape,
+    implicit_size_from_params,
     rv_size_is_none,
     to_tuple,
 )
@@ -593,48 +594,28 @@ def logp(value, n, p):
         )
 
 
-class DirichletMultinomialRV(RandomVariable):
+class DirichletMultinomialRV(SymbolicRandomVariable):
     name = "dirichlet_multinomial"
-    ndim_supp = 1
-    ndims_params = [0, 1]
-    dtype = "int64"
-    _print_name = ("DirichletMN", "\\operatorname{DirichletMN}")
-
-    def _supp_shape_from_params(self, dist_params, param_shapes=None):
-        return supp_shape_from_ref_param_shape(
-            ndim_supp=self.ndim_supp,
-            dist_params=dist_params,
-            param_shapes=param_shapes,
-            ref_param_idx=1,
-        )
+    signature = "[rng],[size],(),(p)->[rng],(p)"
+    _print_name = ("DirichletMultinomial", "\\operatorname{DirichletMultinomial}")
 
     @classmethod
-    def rng_fn(cls, rng, n, a, size):
-        if n.ndim > 0 or a.ndim > 1:
-            n, a = broadcast_params([n, a], cls.ndims_params)
-            size = tuple(size or ())
-
-            if size:
-                n = np.broadcast_to(n, size)
-                a = np.broadcast_to(a, (*size, a.shape[-1]))
-
-            res = np.empty(a.shape)
-            for idx in np.ndindex(a.shape[:-1]):
-                p = rng.dirichlet(a[idx])
-                res[idx] = rng.multinomial(n[idx], p)
-            return res
-        else:
-            # n is a scalar, a is a 1d array
-            p = rng.dirichlet(a, size=size)  # (size, a.shape)
-
-            res = np.empty(p.shape)
-            for idx in np.ndindex(p.shape[:-1]):
-                res[idx] = rng.multinomial(n, p[idx])
+    def rv_op(cls, n, a, *, size=None, rng=None):
+        n = pt.as_tensor(n, dtype=int)
+        a = pt.as_tensor(a)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
 
-            return res
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(n, a, ndims_params=cls.ndims_params)
 
+        next_rng, p = dirichlet(a, size=size, rng=rng).owner.outputs
+        final_rng, rv = multinomial(n, p, size=size, rng=next_rng).owner.outputs
 
-dirichlet_multinomial = DirichletMultinomialRV()
+        return cls(
+            inputs=[rng, size, n, a],
+            outputs=[final_rng, rv],
+        )(rng, size, n, a)
 
 
 class DirichletMultinomial(Discrete):
@@ -666,7 +647,8 @@ class DirichletMultinomial(Discrete):
         the length of the last axis.
     """
 
-    rv_op = dirichlet_multinomial
+    rv_type = DirichletMultinomialRV
+    rv_op = DirichletMultinomialRV.rv_op
 
     @classmethod
     def dist(cls, n, a, *args, **kwargs):