Refactor likelihood module (#33)

* turn likelihood module into a package * update test likelihood * update more tests * move global functions from base to init * fix pep8 issues * rename test_models to analytic * update for change in pycbc * point to new location of calibration model in pycbc * bump pycbc requirements * more import fixes
gwastro · Jul 8, 2018 · 67ee007 · 67ee007
1 parent 40a4de2
commit 67ee007
Show file tree

Hide file tree

Showing 10 changed files with 747 additions and 677 deletions.
diff --git a/bin/gwin b/bin/gwin
@@ -26,7 +26,7 @@ import shutil
 import numpy
 
 import pycbc
-from pycbc import (calibration, distributions, transforms, fft, gate,
+from pycbc import (distributions, transforms, fft,
                    opt, psd, scheme, strain, weave)
 from pycbc.waveform import generator
 
@@ -114,7 +114,7 @@ psd.insert_psd_option_group_multi_ifo(parser)
 scheme.insert_processing_option_group(parser)
 strain.insert_strain_option_group_multi_ifo(parser)
 weave.insert_weave_option_group(parser)
-gate.add_gate_option_group(parser)
+strain.add_gate_option_group(parser)
 
 # parse command line
 opts = parser.parse_args()
@@ -201,12 +201,12 @@ with ctx:
     # get ifo-specific instances of calibration model
     if cp.has_section('calibration'):
         logging.info("Initializing calibration model")
-        recalib = {ifo : calibration.read_model_from_config(cp, ifo) for
+        recalib = {ifo : strain.read_model_from_config(cp, ifo) for
                    ifo in opts.instruments}
         likelihood_args['recalib'] = recalib
 
     # get gates for templates
-    gates = gate.gates_from_cli(opts)
+    gates = strain.gates_from_cli(opts)
     if gates:
         likelihood_args['gates'] = gates
 

diff --git a/gwin/likelihood/__init__.py b/gwin/likelihood/__init__.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018  Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+"""
+This package provides classes and functions for evaluating Bayesian statistics
+assuming various noise models.
+"""
+
+
+from .analytic import (TestEggbox, TestNormal, TestRosenbrock, TestVolcano)
+from .gaussian_noise import (GaussianLikelihood,
+                             MarginalizedPhaseGaussianLikelihood)
+
+
+# Used to manage a likelihood instance across multiple cores or MPI
+_global_instance = None
+
+
+def _call_global_likelihood(*args, **kwds):
+    """Private function for global likelihood (needed for parallelization)."""
+    return _global_instance(*args, **kwds)  # pylint:disable=not-callable
+
+
+def read_from_config(cp, section="likelihood", **kwargs):
+    """Initializes a ``LikelihoodEvaluator`` from the given config file.
+
+    The section must have a ``name`` argument. The name argument corresponds to
+    the name of the class to initialize.
+
+    Parameters
+    ----------
+    cp : WorkflowConfigParser
+        Config file parser to read.
+    section : str, optional
+        The section to read. Default is "likelihood".
+    \**kwargs :
+        All other keyword arguments are passed to the ``from_config`` method
+        of the class specified by the name argument.
+
+    Returns
+    -------
+    cls
+        The initialized ``LikelihoodEvaluator``.
+    """
+    # use the name to get the distribution
+    name = cp.get(section, "name")
+    return likelihood_evaluators[name].from_config(
+        cp, section=section, **kwargs)
+
+
+likelihood_evaluators = {_cls.name: _cls for _cls in (
+    TestEggbox,
+    TestNormal,
+    TestRosenbrock,
+    TestVolcano,
+    GaussianLikelihood,
+    MarginalizedPhaseGaussianLikelihood,
+)}
diff --git a/gwin/likelihood/analytic.py b/gwin/likelihood/analytic.py
@@ -0,0 +1,184 @@
+# Copyright (C) 2018  Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+"""
+This modules provides likelihood classes that have analytic solutions for the
+log likelihood.
+"""
+
+import numpy
+from scipy import stats
+
+from .base import BaseLikelihoodEvaluator
+
+
+class TestNormal(BaseLikelihoodEvaluator):
+    r"""The test distribution is an multi-variate normal distribution.
+
+    The number of dimensions is set by the number of ``variable_args`` that are
+    passed. For details on the distribution used, see
+    ``scipy.stats.multivariate_normal``.
+
+    Parameters
+    ----------
+    variable_args : (tuple of) string(s)
+        A tuple of parameter names that will be varied.
+    mean : array-like, optional
+        The mean values of the parameters. If None provide, will use 0 for all
+        parameters.
+    cov : array-like, optional
+        The covariance matrix of the parameters. If None provided, will use
+        unit variance for all parameters, with cross-terms set to 0.
+    **kwargs :
+        All other keyword arguments are passed to ``BaseLikelihoodEvaluator``.
+
+    """
+    name = "test_normal"
+
+    def __init__(self, variable_args, mean=None, cov=None, **kwargs):
+        # set up base likelihood parameters
+        super(TestNormal, self).__init__(variable_args, **kwargs)
+        # set the lognl to 0 since there is no data
+        self.set_lognl(0.)
+        # store the pdf
+        if mean is None:
+            mean = [0.]*len(variable_args)
+        if cov is None:
+            cov = [1.]*len(variable_args)
+        self._dist = stats.multivariate_normal(mean=mean, cov=cov)
+        # check that the dimension is correct
+        if self._dist.dim != len(variable_args):
+            raise ValueError("dimension mis-match between variable_args and "
+                             "mean and/or cov")
+
+    def loglikelihood(self, **params):
+        """Returns the log pdf of the multivariate normal.
+        """
+        return self._dist.logpdf([params[p] for p in self.variable_args])
+
+
+class TestEggbox(BaseLikelihoodEvaluator):
+    r"""The test distribution is an 'eggbox' function:
+
+    .. math::
+
+        \log \mathcal{L}(\Theta) = \left[
+            2+\prod_{i=1}^{n}\cos\left(\frac{\theta_{i}}{2}\right)\right]^{5}
+
+    The number of dimensions is set by the number of ``variable_args`` that are
+    passed.
+
+    Parameters
+    ----------
+    variable_args : (tuple of) string(s)
+        A tuple of parameter names that will be varied.
+    **kwargs :
+        All other keyword arguments are passed to ``BaseLikelihoodEvaluator``.
+
+    """
+    name = "test_eggbox"
+
+    def __init__(self, variable_args, **kwargs):
+        # set up base likelihood parameters
+        super(TestEggbox, self).__init__(variable_args, **kwargs)
+
+        # set the lognl to 0 since there is no data
+        self.set_lognl(0.)
+
+    def loglikelihood(self, **params):
+        """Returns the log pdf of the eggbox function.
+        """
+        return (2 + numpy.prod(numpy.cos([
+            params[p]/2. for p in self.variable_args]))) ** 5
+
+
+class TestRosenbrock(BaseLikelihoodEvaluator):
+    r"""The test distribution is the Rosenbrock function:
+
+    .. math::
+
+        \log \mathcal{L}(\Theta) = -\sum_{i=1}^{n-1}[
+            (1-\theta_{i})^{2}+100(\theta_{i+1} - \theta_{i}^{2})^{2}]
+
+    The number of dimensions is set by the number of ``variable_args`` that are
+    passed.
+
+    Parameters
+    ----------
+    variable_args : (tuple of) string(s)
+        A tuple of parameter names that will be varied.
+    **kwargs :
+        All other keyword arguments are passed to ``BaseLikelihoodEvaluator``.
+
+    """
+    name = "test_rosenbrock"
+
+    def __init__(self, variable_args, **kwargs):
+        # set up base likelihood parameters
+        super(TestRosenbrock, self).__init__(variable_args, **kwargs)
+
+        # set the lognl to 0 since there is no data
+        self.set_lognl(0.)
+
+    def loglikelihood(self, **params):
+        """Returns the log pdf of the Rosenbrock function.
+        """
+        logl = 0
+        p = [params[p] for p in self.variable_args]
+        for i in range(len(p) - 1):
+            logl -= ((1 - p[i])**2 + 100 * (p[i+1] - p[i]**2)**2)
+        return logl
+
+
+class TestVolcano(BaseLikelihoodEvaluator):
+    r"""The test distribution is a two-dimensional 'volcano' function:
+
+    .. math::
+        \Theta =
+            \sqrt{\theta_{1}^{2} + \theta_{2}^{2}} \log \mathcal{L}(\Theta) =
+            25\left(e^{\frac{-\Theta}{35}} +
+                    \frac{1}{2\sqrt{2\pi}} e^{-\frac{(\Theta-5)^{2}}{8}}\right)
+
+    Parameters
+    ----------
+    variable_args : (tuple of) string(s)
+        A tuple of parameter names that will be varied. Must have length 2.
+    **kwargs :
+        All other keyword arguments are passed to ``BaseLikelihoodEvaluator``.
+
+    """
+    name = "test_volcano"
+
+    def __init__(self, variable_args, **kwargs):
+        # set up base likelihood parameters
+        super(TestVolcano, self).__init__(variable_args, **kwargs)
+
+        # make sure there are exactly two variable args
+        if len(self.variable_args) != 2:
+            raise ValueError("TestVolcano distribution requires exactly "
+                             "two variable args")
+
+        # set the lognl to 0 since there is no data
+        self.set_lognl(0.)
+
+    def loglikelihood(self, **params):
+        """Returns the log pdf of the 2D volcano function.
+        """
+        p = [params[p] for p in self.variable_args]
+        r = numpy.sqrt(p[0]**2 + p[1]**2)
+        mu, sigma = 5.0, 2.0
+        return 25 * (
+            numpy.exp(-r/35) + 1 / (sigma * numpy.sqrt(2 * numpy.pi)) *
+            numpy.exp(-0.5 * ((r - mu) / sigma) ** 2))