Deprecating SampleDist in favor of GaussianKDE (#284)

Deprecating SampleDist in favor of GaussianKDE
jonathf · Oct 26, 2020 · 918b07a · 918b07a
1 parent 5f40d1b
commit 918b07a
Show file tree

Hide file tree

Showing 13 changed files with 131 additions and 205 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+.coverage
+__pycache__
+*.egg-info
+*.pyc
+.ipynb_checkpoints
+.mypy_cache
+.envrc
+docs/.build
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,12 +1,19 @@
 Master Branch
 =============
 
+Version 4.0.1 (2020-10-26)
+==========================
+
+Release!
+
 ADDED:
   * Gaussian Mixture Model: `GaussianMixture`.
   * Tutorial for how to use `scikit-learn` mixture models to fit a model, and
     `chaospy` to generate quasi-random samples and orthogonal polynomials.
 CHANGED:
   * `chaospy.Trunc` updated to take both `lower` and `upper` at the same time.
+REMOVED:
+  * `chaospy.SampleDist` removed in favor of `chaospy.GaussianKDE`.
 
 Version 4.0-beta3 (2020-10-22)
 ==============================

diff --git a/README.rst b/README.rst
@@ -141,8 +141,9 @@ Also a few shout-outs:
 | `orthopy`_   | Thanks to `Nico Schlömer`_ for providing the implementation  |
 | `quadpy`_    | for several of the quadrature integration methods.           |
 +--------------+--------------------------------------------------------------+
-| ``UQRF``     | Thanks to `Florian Künzner`_ for providing the               |
-|              | implementation for `sample distribution`_.                   |
+| ``UQRF``     | Thanks to `Florian Künzner`_ for providing the initial       |
+|              | implementation of kernel density estimation and              |
+|              | quantity-of-interest distribution.                           |
 +--------------+--------------------------------------------------------------+
 
 .. _OpenTURNS: http://openturns.github.io/openturns/latest

diff --git a/chaospy/descriptives/quantity_of_interest.py b/chaospy/descriptives/quantity_of_interest.py
@@ -3,9 +3,7 @@
 from functools import reduce
 from operator import mul
 import numpy
-
-from .. import distributions
-from ..external import SampleDist
+import chaospy
 
 
 def QoI_Dist(poly, dist, sample=10000, **kws):
@@ -28,7 +26,7 @@ def QoI_Dist(poly, dist, sample=10000, **kws):
             Number of samples used in estimation to construct the KDE.
 
     Returns:
-        (numpy.ndarray):
+        (Distribution):
             The constructed quantity of interest (QoI) distributions, where
             ``qoi_dists.shape==poly.shape``.
 
@@ -37,44 +35,16 @@ def QoI_Dist(poly, dist, sample=10000, **kws):
         >>> x = chaospy.variable(1)
         >>> poly = chaospy.polynomial([x])
         >>> qoi_dist = chaospy.QoI_Dist(poly, dist)
-        >>> values = qoi_dist[0].pdf([-0.75, 0., 0.75])
+        >>> values = qoi_dist.pdf([-0.75, 0., 0.75])
         >>> values.round(8)
-        array([0.29143037, 0.39931708, 0.29536329])
+        array([0.29143989, 0.39939823, 0.29531414])
+
     """
     shape = poly.shape
     poly = poly.flatten()
     dim = len(dist)
 
-    #sample from the inumpyut dist
-    samples = dist.sample(sample, **kws)
-
-    qoi_dists = []
-    for i in range(0, len(poly)):
-        #sample the polynomial solution
-        if dim == 1:
-            dataset = poly[i](samples)
-        else:
-            dataset = poly[i](*samples)
-
-        lo = dataset.min()
-        up = dataset.max()
-
-        #creates qoi_dist
-        qoi_dist = SampleDist(dataset, lo, up)
-        qoi_dists.append(qoi_dist)
-
-    # reshape the qoi_dists to match the shape of the input poly
-    if shape:
-        def reshape(lst, shape):
-            if len(shape) == 1:
-                return lst
-            n = reduce(mul, shape[1:])
-            return [reshape(lst[i*n:(i+1)*n], shape[1:]) for i in range(len(lst)//n)]
-        qoi_dists = reshape(qoi_dists, shape)
-    else:
-        qoi_dists = qoi_dists[0]
-
-    if not shape:
-        qoi_dists = qoi_dists.item()
-
-    return qoi_dists
+    #sample from the input dist
+    samples = numpy.atleast_2d(dist.sample(sample, **kws))
+    qoi_dist = chaospy.GaussianKDE(poly(*samples))
+    return qoi_dist
diff --git a/chaospy/distributions/__init__.py b/chaospy/distributions/__init__.py
@@ -73,7 +73,7 @@
 from .operators import *
 from .constructor import construct
 from .approximation import *
-from .kernel import GaussianKDE, GaussianMixture
+from .kernel import *
 
 from . import (
     baseclass, sampler, approximation,

diff --git a/chaospy/distributions/kernel/__init__.py b/chaospy/distributions/kernel/__init__.py
@@ -1,3 +1,34 @@
-"""Kernel density estimation."""
+"""
+In some cases a constructed distribution that are first and foremost data
+driven. In such scenarios it make sense to make use of
+`kernel density estimation`_ (KDE). In ``chaospy`` KDE can be accessed through
+the :func:`GaussianKDE` constructor.
+
+Basic usage of the :func:`GaussianKDE` constructor involves just passing the
+data as input argument::
+
+    >>> data = [3, 4, 5, 5]
+    >>> distribution = chaospy.GaussianKDE(data)
+
+This distribution can be used as any other distributions::
+
+    >>> distribution.cdf([3, 3.5, 4, 4.5, 5]).round(4)
+    array([0.1393, 0.2542, 0.3889, 0.5512, 0.7359])
+    >>> distribution.mom(1).round(4)
+    4.25
+    >>> distribution.sample(4).round(4)
+    array([4.7784, 2.8769, 5.8109, 4.2995])
+
+In addition multivariate distributions supported::
+
+    >>> data = [[1, 2, 2, 3], [5, 5, 4, 3]]
+    >>> distribution = chaospy.GaussianKDE(data)
+    >>> distribution.sample(4).round(4)
+    array([[2.081 , 3.0304, 3.0882, 0.4872],
+           [3.2878, 2.5473, 2.2699, 5.3412]])
+
+.. _kernel density estimation: \
+https://en.wikipedia.org/wiki/Kernel_density_estimation
+"""
 from .gaussian import GaussianKDE
 from .mixture import GaussianMixture
diff --git a/chaospy/distributions/kernel/gaussian.py b/chaospy/distributions/kernel/gaussian.py
@@ -83,8 +83,12 @@ def _mom(self, k_loc, cache):
 
     def _lower(self, idx, dim, cache):
         """Lower bounds."""
+        del dim
+        del cache
         return (self.samples[idx]-10*numpy.sqrt(self.h_mat[:, idx, idx]).T).min(-1)
 
     def _upper(self, idx, dim, cache):
         """Upper bounds."""
+        del dim
+        del cache
         return (self.samples[idx]+10*numpy.sqrt(self.h_mat[:, idx, idx]).T).max(-1)
diff --git a/chaospy/external/__init__.py b/chaospy/external/__init__.py
@@ -7,4 +7,3 @@
 """
 from .openturns_ import openturns_dist, OpenTURNSDist
 from .scipy_stats import ScipyStatsDist
-from .samples import sample_dist, SampleDist
diff --git a/chaospy/external/samples.py b/chaospy/external/samples.py
diff --git a/docs/tutorials/advanced/gaussian_mixture_model.ipynb b/docs/tutorials/advanced/gaussian_mixture_model.ipynb
@@ -77,7 +77,7 @@
     "`chaospy` supports Gaussian mixture model representation, but does not provide an automatic method for constructing them from data.\n",
     "However, this is something for example `scikit-learn` supports.\n",
     "It is possible to use `scikit-learn` to fit a model, and use the generated parameters in the `chaospy` implementation.\n",
-    "For example, let us consider the (Iris example from scikit-learn's documentation)[https://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html] and its \"full\" implementation and the 2-dimensional representation:"
+    "For example, let us consider the [Iris example from scikit-learn's documentation](https://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html) (\"full\" implementation in 2-dimensional representation):"
    ]
   },
   {