diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4757a9d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.coverage +__pycache__ +*.egg-info +*.pyc +.ipynb_checkpoints +.mypy_cache +.envrc +docs/.build diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bc3bf756..01c95423 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,12 +1,19 @@ Master Branch ============= +Version 4.0.1 (2020-10-26) +========================== + +Release! + ADDED: * Gaussian Mixture Model: `GaussianMixture`. * Tutorial for how to use `scikit-learn` mixture models to fit a model, and `chaospy` to generate quasi-random samples and orthogonal polynomials. CHANGED: * `chaospy.Trunc` updated to take both `lower` and `upper` at the same time. +REMOVED: + * `chaospy.SampleDist` removed in favor of `chaospy.GaussianKDE`. Version 4.0-beta3 (2020-10-22) ============================== diff --git a/README.rst b/README.rst index acb30b85..131b3fdd 100644 --- a/README.rst +++ b/README.rst @@ -141,8 +141,9 @@ Also a few shout-outs: | `orthopy`_ | Thanks to `Nico Schlömer`_ for providing the implementation | | `quadpy`_ | for several of the quadrature integration methods. | +--------------+--------------------------------------------------------------+ -| ``UQRF`` | Thanks to `Florian Künzner`_ for providing the | -| | implementation for `sample distribution`_. | +| ``UQRF`` | Thanks to `Florian Künzner`_ for providing the initial | +| | implementation of kernel density estimation and | +| | quantity-of-interest distribution. | +--------------+--------------------------------------------------------------+ .. _OpenTURNS: http://openturns.github.io/openturns/latest diff --git a/chaospy/descriptives/quantity_of_interest.py b/chaospy/descriptives/quantity_of_interest.py index d92d875d..e75098f0 100644 --- a/chaospy/descriptives/quantity_of_interest.py +++ b/chaospy/descriptives/quantity_of_interest.py @@ -3,9 +3,7 @@ from functools import reduce from operator import mul import numpy - -from .. import distributions -from ..external import SampleDist +import chaospy def QoI_Dist(poly, dist, sample=10000, **kws): @@ -28,7 +26,7 @@ def QoI_Dist(poly, dist, sample=10000, **kws): Number of samples used in estimation to construct the KDE. Returns: - (numpy.ndarray): + (Distribution): The constructed quantity of interest (QoI) distributions, where ``qoi_dists.shape==poly.shape``. @@ -37,44 +35,16 @@ def QoI_Dist(poly, dist, sample=10000, **kws): >>> x = chaospy.variable(1) >>> poly = chaospy.polynomial([x]) >>> qoi_dist = chaospy.QoI_Dist(poly, dist) - >>> values = qoi_dist[0].pdf([-0.75, 0., 0.75]) + >>> values = qoi_dist.pdf([-0.75, 0., 0.75]) >>> values.round(8) - array([0.29143037, 0.39931708, 0.29536329]) + array([0.29143989, 0.39939823, 0.29531414]) + """ shape = poly.shape poly = poly.flatten() dim = len(dist) - #sample from the inumpyut dist - samples = dist.sample(sample, **kws) - - qoi_dists = [] - for i in range(0, len(poly)): - #sample the polynomial solution - if dim == 1: - dataset = poly[i](samples) - else: - dataset = poly[i](*samples) - - lo = dataset.min() - up = dataset.max() - - #creates qoi_dist - qoi_dist = SampleDist(dataset, lo, up) - qoi_dists.append(qoi_dist) - - # reshape the qoi_dists to match the shape of the input poly - if shape: - def reshape(lst, shape): - if len(shape) == 1: - return lst - n = reduce(mul, shape[1:]) - return [reshape(lst[i*n:(i+1)*n], shape[1:]) for i in range(len(lst)//n)] - qoi_dists = reshape(qoi_dists, shape) - else: - qoi_dists = qoi_dists[0] - - if not shape: - qoi_dists = qoi_dists.item() - - return qoi_dists + #sample from the input dist + samples = numpy.atleast_2d(dist.sample(sample, **kws)) + qoi_dist = chaospy.GaussianKDE(poly(*samples)) + return qoi_dist diff --git a/chaospy/distributions/__init__.py b/chaospy/distributions/__init__.py index b87e2798..78b8c3a1 100644 --- a/chaospy/distributions/__init__.py +++ b/chaospy/distributions/__init__.py @@ -73,7 +73,7 @@ from .operators import * from .constructor import construct from .approximation import * -from .kernel import GaussianKDE, GaussianMixture +from .kernel import * from . import ( baseclass, sampler, approximation, diff --git a/chaospy/distributions/kernel/__init__.py b/chaospy/distributions/kernel/__init__.py index 41061f16..983a633c 100644 --- a/chaospy/distributions/kernel/__init__.py +++ b/chaospy/distributions/kernel/__init__.py @@ -1,3 +1,34 @@ -"""Kernel density estimation.""" +""" +In some cases a constructed distribution that are first and foremost data +driven. In such scenarios it make sense to make use of +`kernel density estimation`_ (KDE). In ``chaospy`` KDE can be accessed through +the :func:`GaussianKDE` constructor. + +Basic usage of the :func:`GaussianKDE` constructor involves just passing the +data as input argument:: + + >>> data = [3, 4, 5, 5] + >>> distribution = chaospy.GaussianKDE(data) + +This distribution can be used as any other distributions:: + + >>> distribution.cdf([3, 3.5, 4, 4.5, 5]).round(4) + array([0.1393, 0.2542, 0.3889, 0.5512, 0.7359]) + >>> distribution.mom(1).round(4) + 4.25 + >>> distribution.sample(4).round(4) + array([4.7784, 2.8769, 5.8109, 4.2995]) + +In addition multivariate distributions supported:: + + >>> data = [[1, 2, 2, 3], [5, 5, 4, 3]] + >>> distribution = chaospy.GaussianKDE(data) + >>> distribution.sample(4).round(4) + array([[2.081 , 3.0304, 3.0882, 0.4872], + [3.2878, 2.5473, 2.2699, 5.3412]]) + +.. _kernel density estimation: \ +https://en.wikipedia.org/wiki/Kernel_density_estimation +""" from .gaussian import GaussianKDE from .mixture import GaussianMixture diff --git a/chaospy/distributions/kernel/gaussian.py b/chaospy/distributions/kernel/gaussian.py index 979f9c27..ec829679 100644 --- a/chaospy/distributions/kernel/gaussian.py +++ b/chaospy/distributions/kernel/gaussian.py @@ -83,8 +83,12 @@ def _mom(self, k_loc, cache): def _lower(self, idx, dim, cache): """Lower bounds.""" + del dim + del cache return (self.samples[idx]-10*numpy.sqrt(self.h_mat[:, idx, idx]).T).min(-1) def _upper(self, idx, dim, cache): """Upper bounds.""" + del dim + del cache return (self.samples[idx]+10*numpy.sqrt(self.h_mat[:, idx, idx]).T).max(-1) diff --git a/chaospy/external/__init__.py b/chaospy/external/__init__.py index b5917e4b..5cfc09d4 100644 --- a/chaospy/external/__init__.py +++ b/chaospy/external/__init__.py @@ -7,4 +7,3 @@ """ from .openturns_ import openturns_dist, OpenTURNSDist from .scipy_stats import ScipyStatsDist -from .samples import sample_dist, SampleDist diff --git a/chaospy/external/samples.py b/chaospy/external/samples.py deleted file mode 100644 index 7375cc3a..00000000 --- a/chaospy/external/samples.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -In some cases a constructed distribution that are first and foremost data -driven. In such scenarios it make sense to make use of -`kernel density estimation`_ (KDE). In ``chaospy`` KDE can be accessed through -the :func:`SampleDist` constructor. - -Basic usage of the :func:`SampleDist` constructor involves just passing the -data as input argument:: - - >>> data = [3, 4, 5, 5] - >>> distribution = chaospy.SampleDist(data) - -This distribution can be used as any other distributions:: - - >>> distribution.cdf([3, 3.5, 4, 4.5, 5]).round(4) - array([0. , 0.1932, 0.4279, 0.7043, 1. ]) - >>> distribution.mom(1).round(4) - 4.25 - >>> distribution.sample(4).round(4) - array([4.4131, 3.3111, 4.9139, 4.1042]) - -It also supports lower and upper bounds defining where the range is expected to -appear, which gives a slightly different distribution:: - - >>> distribution = chaospy.SampleDist(data, lo=2, up=6) - >>> distribution.cdf([3, 3.5, 4, 4.5, 5]).round(4) - array([0.1344, 0.2543, 0.4001, 0.5716, 0.7552]) - -In addition multivariate distributions supported:: - - >>> data = [[1, 2, 2, 3], [5, 5, 4, 3]] - >>> distribution = chaospy.SampleDist(data) - >>> distribution.sample(4).round(4) - array([[1.5286, 2.0468, 2.1125, 1.8947], - [4.402 , 4.1522, 4.4384, 4.5737]]) - -.. _kernel density estimation: \ -https://en.wikipedia.org/wiki/Kernel_density_estimation -""" -import numpy -from scipy.stats import gaussian_kde -import chaospy - -from chaospy.distributions import SimpleDistribution - - -class sample_dist(SimpleDistribution): - """A distribution that is based on a kernel density estimator (KDE).""" - - def __init__(self, samples, lo, up): - samples = numpy.asarray(samples) - self.samples = samples - self.kernel = gaussian_kde(samples, bw_method="scott") - self.flo = self.kernel.integrate_box_1d(0, lo) - self.fup = self.kernel.integrate_box_1d(0, up) - self.unbound = numpy.all(lo == samples.min()) - self.unbound &= numpy.all(up == samples.max()) - super(sample_dist, self).__init__( - parameters=dict(lo=lo, up=up), - repr_args=[repr(samples), lo, up], - ) - - def _cdf(self, xloc, lo, up): - cdf_vals = numpy.array([self.kernel.integrate_box_1d(0, x) - for x in xloc]) - return (cdf_vals-self.flo)/(self.fup-self.flo) - - def _pdf(self, x, lo, up): - return self.kernel(x) - - def _lower(self, lo, up): - return lo - - def _upper(self, lo, up): - return up - - def _mom(self, k, lo, up): - if self.unbound: - return numpy.prod(numpy.mean(self.samples.T**k, -1)) - raise chaospy.StochasticallyDependentError("component lack support") - - -def SampleDist(samples, lo=None, up=None, threshold=1e-5): - """ - Distribution based on samples. - - Estimates a distribution from the given samples by constructing a kernel - density estimator (KDE). - - Args: - samples (numpy.ndarray): - Sample values to construction of the KDE. Either shape - ``(N,)`` or ``(D, N)``, where ``N`` are the number of - samples, and ``D`` is the number of dimension in the - distribution. - lo (float): - Location of lower bound. - up (float): - Location of upper bound. - threshold (float): - Threshold for how low the correlation between two - columns should be before defining them as - stochastically independent. - - Example: - >>> distribution = chaospy.SampleDist([0, 1, 1, 1, 2]) - >>> distribution - sample_dist(array([0, 1, 1, 1, 2]), 0, 2) - >>> q = numpy.linspace(0, 1, 5) - >>> distribution.inv(q).round(4) - array([0. , 0.6016, 1. , 1.3984, 2. ]) - >>> distribution.fwd(distribution.inv(q)).round(4) - array([0. , 0.25, 0.5 , 0.75, 1. ]) - >>> distribution.pdf(distribution.inv(q)).round(4) - array([0.2254, 0.4272, 0.5135, 0.4272, 0.2254]) - >>> distribution.sample(4).round(4) - array([0.3662, 0.6073, 0.9156, 1.0883]) - >>> distribution.mom(1).round(4) - 1.0 - - """ - samples = numpy.atleast_2d(samples) - assert samples.ndim == 2, "samples have too many dimensions provided" - - if lo is None: - lo = samples.min(axis=-1) - else: - lo = numpy.broadcast_to(lo, len(samples)) - if up is None: - up = samples.max(axis=-1) - else: - up = numpy.broadcast_to(up, len(samples)) - - # construct vector of marginals - distributions = [] - for samples_, lo_, up_ in zip(samples, lo, up): - #construct the kernel density estimator - try: - dist = sample_dist(samples_, lo_, up_) - #raised by gaussian_kde if dataset is singular matrix - except numpy.linalg.LinAlgError: - dist = chaospy.Uniform(lower=-numpy.inf, upper=numpy.inf) - distributions.append(dist) - - if len(samples) == 1: - distributions = distributions[0] - - else: - distributions = chaospy.J(*distributions) - - # Attach dependencies to data. - correlation = numpy.corrcoef(samples) - correlation[numpy.abs(correlation) <= threshold] = 0 - if numpy.any(correlation != numpy.diag(numpy.diag(correlation))): - distributions = chaospy.Nataf(distributions, correlation) - - return distributions diff --git a/docs/tutorials/advanced/gaussian_mixture_model.ipynb b/docs/tutorials/advanced/gaussian_mixture_model.ipynb index d4e674f9..7c785019 100644 --- a/docs/tutorials/advanced/gaussian_mixture_model.ipynb +++ b/docs/tutorials/advanced/gaussian_mixture_model.ipynb @@ -77,7 +77,7 @@ "`chaospy` supports Gaussian mixture model representation, but does not provide an automatic method for constructing them from data.\n", "However, this is something for example `scikit-learn` supports.\n", "It is possible to use `scikit-learn` to fit a model, and use the generated parameters in the `chaospy` implementation.\n", - "For example, let us consider the (Iris example from scikit-learn's documentation)[https://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html] and its \"full\" implementation and the 2-dimensional representation:" + "For example, let us consider the [Iris example from scikit-learn's documentation](https://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html) (\"full\" implementation in 2-dimensional representation):" ] }, { diff --git a/docs/tutorials/advanced/kernel_density_estimation.ipynb b/docs/tutorials/advanced/kernel_density_estimation.ipynb index 5f21faa8..965be551 100644 --- a/docs/tutorials/advanced/kernel_density_estimation.ipynb +++ b/docs/tutorials/advanced/kernel_density_estimation.ipynb @@ -59,7 +59,7 @@ ], "source": [ "import chaospy\n", - "distribution = chaospy.GaussianKDE(samples)\n", + "distribution = chaospy.GaussianKDE(samples, h_mat=0.05)\n", "distribution" ] }, @@ -70,7 +70,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] diff --git a/pyproject.toml b/pyproject.toml index 1e65886c..5d5bdad7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "chaospy" -version = "4.0-beta3" +version = "4.0.1" description = "Numerical tool for perfroming uncertainty quantification" license = "MIT" authors = ["Jonathan Feinberg"] diff --git a/tests/distributions/kernel/test_gaussian_kde.py b/tests/distributions/kernel/test_gaussian_kde.py new file mode 100644 index 00000000..e76e6b58 --- /dev/null +++ b/tests/distributions/kernel/test_gaussian_kde.py @@ -0,0 +1,63 @@ +"""Tests for Gaussian kernel density estimation.""" +import numpy +import chaospy + + +def test_gaussian_kde_1d_integration(): + """Make sure that 1D distribution integration is correct.""" + dist = chaospy.GaussianKDE([0, 1, 2]) + t = numpy.mgrid[-2.6:4.6:2e5j] + scale = numpy.ptp(t) + assert numpy.isclose(numpy.mean(dist.pdf(t)*scale), 1.) + assert numpy.isclose(numpy.mean(t*dist.pdf(t)*scale), 1.) + + +def test_gaussian_kde_2d_integration(): + """Make sure that 2D distribution integration is correct.""" + dist = chaospy.GaussianKDE([[0, 2], [2, 0]]) + samples = dist.sample(1e4) + assert numpy.allclose(numpy.mean(samples, axis=-1), 1, rtol=1e-1) + + +def test_gaussian_kde_rotation(): + """Make sure rotation does not affect mapping.""" + dist = chaospy.GaussianKDE([[0, 0, 2], [0, 2, 0], [2, 0, 0]], rotation=[0, 1, 2]) + grid = numpy.mgrid[0.01:0.99:2j, 0.01:0.99:2j, 0.01:0.99:2j].reshape(3, 8) + inverse = dist.inv(grid) + assert numpy.allclose(dist.fwd(inverse), grid) + assert numpy.allclose(dist.pdf(inverse), + [2.550e-05, 2.553e-05, 2.553e-05, 2.552e-05, + 2.525e-05, 2.522e-05, 2.522e-05, 2.519e-05]) + assert numpy.allclose( + inverse, + [[-1.38424, -1.38424, -1.38424, -1.38424, 3.19971, 3.19971, 3.19971, 3.19971], + [-1.31003, -1.31003, 3.31003, 3.31003, -1.48391, -1.48391, 1.48429, 1.48429], + [ 0.51571, 3.48391, -1.48391, 1.48413, -1.48391, 1.48429, -1.48391, 1.48429]], + rtol=1e-5, + ) + dist = chaospy.GaussianKDE([[0, 0, 2], [0, 2, 0], [2, 0, 0]], rotation=[2, 1, 0]) + inverse = dist.inv(grid) + assert numpy.allclose(dist.fwd(inverse), grid) + assert numpy.allclose(dist.pdf(inverse), + [2.550e-05, 2.525e-05, 2.553e-05, 2.522e-05, + 2.553e-05, 2.522e-05, 2.552e-05, 2.519e-05]) + assert numpy.allclose( + inverse, + [[ 0.51571, -1.48391, -1.48391, -1.48391, 3.48391, 1.48429, 1.48413, 1.48429], + [-1.31003, -1.48391, 3.31003, 1.48429, -1.31003, -1.48391, 3.31003, 1.48429], + [-1.38424, 3.19971, -1.38424, 3.19971, -1.38424, 3.19971, -1.38424, 3.19971]], + rtol=1e-5, + ) + dist = chaospy.GaussianKDE([[0, 0, 2], [0, 2, 0], [2, 0, 0]], rotation=[0, 2, 1]) + inverse = dist.inv(grid) + assert numpy.allclose(dist.fwd(inverse), grid) + assert numpy.allclose(dist.pdf(inverse), + [2.550e-05, 2.553e-05, 2.553e-05, 2.552e-05, + 2.525e-05, 2.522e-05, 2.522e-05, 2.519e-05]) + assert numpy.allclose( + inverse, + [[-1.38424, -1.38424, -1.38424, -1.38424, 3.19971, 3.19971, 3.19971, 3.19971], + [ 0.51571, -1.48391, 3.48391, 1.48413, -1.48391, -1.48391, 1.48429, 1.48429], + [-1.31003, 3.31003, -1.31003, 3.31003, -1.48391, 1.48429, -1.48391, 1.48429]], + rtol=1e-5, + )