From b95af1fdb0e01fc2a31f306d6514533a8681525f Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 18 Oct 2017 13:32:51 +0200 Subject: [PATCH 01/56] catching other error types that may occur when gradients are not available --- pymc3/tuning/starting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index 892e357243..275233281d 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -11,6 +11,7 @@ from ..vartypes import discrete_types, typefilter from ..model import modelcontext, Point from ..theanof import inputvars +import theano.gradient as tg from ..blocking import DictToArrayBijection, ArrayOrdering from ..util import update_start_vals, get_default_varnames @@ -75,7 +76,7 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B", try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True - except AttributeError: + except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: From fd16ba537353a0a77a2beffefabfdc555086b691 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 21 Nov 2017 16:53:34 +0100 Subject: [PATCH 02/56] added a benchmark example for correlated dimensions --- pymc3/examples/samplers_mvnormal.py | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 pymc3/examples/samplers_mvnormal.py diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py new file mode 100644 index 0000000000..1150229902 --- /dev/null +++ b/pymc3/examples/samplers_mvnormal.py @@ -0,0 +1,53 @@ +""" +Comparing different samplers on the banana-shaped distribution + +""" + + +import numpy as np +import pandas as pd +import pymc3 as pm +import theano.tensor as tt +import matplotlib.pyplot as plt + + + + +def run(steppers, p): + steppers = set(steppers) + traces = {} + effn = {} + + with pm.Model() as model: + mu = np.array([0.,0.]) + cov = np.array([[1.,p],[p,1.]]) + z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,)) + + for step_cls in steppers: + name = step_cls.__name__ + print('Step method: {}'.format(name)) + mt = pm.sample( + draws=10000, + step=step_cls(), + start={'z': [0, 0]} + ) + traces[name] = mt + en = pm.diagnostics.effective_n(mt) + effn[name] = np.mean(en['z']) / len(mt) / mt.nchains + return traces, effn + + +if __name__ == '__main__': + methods = [pm.Metropolis, pm.NUTS] + names = [c.__name__ for c in methods] + df = pd.DataFrame(columns=['p'] + names) + df['p'] = [.0,.9] + df = df.set_index('p') + rates = [] + for p in df.index: + trace, rate = run(methods, p) + for name in names: + df.set_value(p, name, rate[name]) + + print('Effective sample size [0...1]') + print(df) From 8ec0b80237923f32a62fb47c40936e12866e7ffb Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 21 Nov 2017 16:55:10 +0100 Subject: [PATCH 03/56] marking Metropolis as COMPATIBLE for all types, added line breaks (code style) --- pymc3/step_methods/metropolis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index e0d049dba7..cd1c74be35 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -168,9 +168,7 @@ def astep(self, q0): @staticmethod def competence(var, has_grad): - if var.dtype in pm.discrete_types: - return Competence.COMPATIBLE - return Competence.INCOMPATIBLE + return Competence.COMPATIBLE def tune(scale, acc_rate): @@ -348,6 +346,7 @@ def competence(var): return Competence.IDEAL return Competence.INCOMPATIBLE + class CategoricalGibbsMetropolis(ArrayStep): """A Metropolis-within-Gibbs step method optimized for categorical variables. This step method works for Bernoulli variables as well, but it is not @@ -465,16 +464,19 @@ def competence(var): return Competence.COMPATIBLE return Competence.INCOMPATIBLE + def sample_except(limit, excluded): candidate = nr.choice(limit - 1) if candidate >= excluded: candidate += 1 return candidate + def softmax(x): e_x = np.exp(x - np.max(x)) return e_x / np.sum(e_x, axis = 0) + def delta_logp(logp, vars, shared): [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) From 1d04f53b3babcd5f4bac134cb176cd9581df4ac2 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 21 Nov 2017 18:06:16 +0100 Subject: [PATCH 04/56] specifying single job to force the sample_many function --- pymc3/examples/samplers_mvnormal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 1150229902..747ea5ba76 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -28,6 +28,8 @@ def run(steppers, p): print('Step method: {}'.format(name)) mt = pm.sample( draws=10000, + njobs=1, + chains=6, step=step_cls(), start={'z': [0, 0]} ) From 2bd6a5a76aa2a7e677fd14e42a917d57763621e9 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 21 Nov 2017 18:21:02 +0100 Subject: [PATCH 05/56] modified sampling procedure to interate chains in parallel (instead of sequentially) --- pymc3/sampling.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 16db40b951..9ab1661d99 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1,4 +1,5 @@ from collections import defaultdict, Iterable +from copy import copy import pickle from joblib import Parallel, delayed @@ -382,6 +383,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, parallel = njobs > 1 and chains > 1 if parallel: + pm._log.info('Multiprocess sampling ({} jobs, {} chains)'.format(njobs, chains)) try: trace = _mp_sample(**sample_args) except pickle.PickleError: @@ -396,6 +398,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, else: raise if not parallel: + pm._log.info('Multichain sampling ({} jobs, {} chains)'.format(njobs, chains)) trace = _sample_many(**sample_args) discard = tune if discard_tuned_samples else 0 @@ -429,7 +432,7 @@ def _check_start_shape(model, start): raise ValueError("Bad shape for start argument:{}".format(e)) -def _sample_many(draws, chain, chains, start, random_seed, **kwargs): +def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs): traces = [] for i in range(chains): trace = _sample(draws=draws, chain=chain + i, start=start[i], @@ -448,6 +451,21 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs): return MultiTrace(traces) +def _sample_many(draws, chain, chains, start, random_seed, step, tune, model, progressbar=None, **kwargs): + # create the generator that iterates all chains in parallel + chains = [chain + c for c in range(chains)] + sampling = _iter_chains(draws, chains, step, start, tune=tune, model=model, random_seed=random_seed) + + if progressbar: + sampling = tqdm(sampling, total=draws) + + latest_traces = None + for it,traces in enumerate(sampling): + latest_traces = traces + # TODO: add support for liveplot during population-sampling + return MultiTrace(traces) + + def _sample(chain, progressbar, random_seed, start, draws=None, step=None, trace=None, tune=None, model=None, live_plot=False, live_plot_kwargs=None, **kwargs): @@ -580,6 +598,78 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, step.report._finalize(strace) +def _iter_chains(draws, chains, step, start, tune=None, + model=None, random_seed=None): + model = modelcontext(model) + draws = int(draws) + if random_seed is not None: + np.random.seed(random_seed) + if draws < 1: + raise ValueError('Argument `draws` should be above 0.') + + # need indepenently tuned samplers for each chain + try: + steppers = [CompoundStep(copy(step)) for c in chains] + except TypeError: + steppers = [copy(step) for c in chains] + + # points tracks the current position of each chain in the parameter space + # it is updated as the chains are advanced + points = [Point(start[c], model=model) for c in chains] + + # prepare a BaseTrace for each chain + traces = [_choose_backend(None, c, model=model) for c in chains] + for chain,strace,step in zip(chains, traces, steppers): + # initialize the trace size + if len(strace) > 0: + update_start_vals(start[chain], strace.point(-1), model) + else: + update_start_vals(start[chain], model.test_point, model) + # initialize tracking of sampler stats + if step.generates_stats and strace.supports_sampler_stats: + strace.setup(draws, chain, step.stats_dtypes) + else: + strace.setup(draws, chain) + + try: + # iterate draws of all chains + for i in range(draws): + # step each of the chains + for c,strace in enumerate(traces): + if i == tune: + steppers[c] = stop_tuning(steppers[c]) + + # advance this chain (TODO: using the points of all chains) + step_result = steppers[c].step(points[c]) + + if steppers[c].generates_stats: + points[c], states = step_result + if strace.supports_sampler_stats: + strace.record(points[c], states) + else: + strace.record(points[c]) + else: + points[c] = step_result + strace.record(points[c]) + # yield the state of all chains in parallel + yield traces + except KeyboardInterrupt: + for chain,strace in enumerate(traces): + strace.close() + if hasattr(step, 'report'): + step.report._finalize(strace) + raise + except BaseException: + for chain,strace in enumerate(traces): + strace.close() + raise + else: + for chain,strace in enumerate(traces): + strace.close() + if hasattr(step, 'report'): + step.report._finalize(strace) + + def _choose_backend(trace, chain, shortcuts=None, **kwds): if isinstance(trace, BaseTrace): return trace From df37cbbb13b121c8730e34a5539518b70ed89db8 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 21 Nov 2017 18:35:21 +0100 Subject: [PATCH 06/56] updated description --- pymc3/examples/samplers_mvnormal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 747ea5ba76..e5dcb8cf33 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -1,5 +1,5 @@ """ -Comparing different samplers on the banana-shaped distribution +Comparing different samplers on a correlated bivariate normal distribution. """ From ba7d083fd2869c93b3138435d42a9ec601bf8b90 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 22 Nov 2017 16:20:27 +0100 Subject: [PATCH 07/56] indexing samplers by chain number instead of chain id --- pymc3/sampling.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 9ab1661d99..240992e06e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -600,6 +600,8 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): + # chains contains the chain numbers, but for indexing we need indices... + cs = list(range(len(chains))) model = modelcontext(model) draws = int(draws) if random_seed is not None: @@ -608,32 +610,36 @@ def _iter_chains(draws, chains, step, start, tune=None, raise ValueError('Argument `draws` should be above 0.') # need indepenently tuned samplers for each chain - try: - steppers = [CompoundStep(copy(step)) for c in chains] - except TypeError: - steppers = [copy(step) for c in chains] + is_compound = isinstance(step, list) + if is_compound: + steppers = [CompoundStep(copy(step)) for c in cs] + else: + steppers = [copy(step) for c in cs] # points tracks the current position of each chain in the parameter space # it is updated as the chains are advanced - points = [Point(start[c], model=model) for c in chains] + points = [Point(start[c], model=model) for c in cs] # prepare a BaseTrace for each chain traces = [_choose_backend(None, c, model=model) for c in chains] - for chain,strace,step in zip(chains, traces, steppers): + for c,strace in enumerate(traces): # initialize the trace size if len(strace) > 0: - update_start_vals(start[chain], strace.point(-1), model) + update_start_vals(start[c], strace.point(-1), model) else: - update_start_vals(start[chain], model.test_point, model) + update_start_vals(start[c], model.test_point, model) # initialize tracking of sampler stats if step.generates_stats and strace.supports_sampler_stats: - strace.setup(draws, chain, step.stats_dtypes) + strace.setup(draws, c, steppers[c].stats_dtypes) else: - strace.setup(draws, chain) + strace.setup(draws, c) try: # iterate draws of all chains for i in range(draws): + # snapshot all points (the original will be updated) + last_points = copy(points) + # step each of the chains for c,strace in enumerate(traces): if i == tune: @@ -654,17 +660,17 @@ def _iter_chains(draws, chains, step, start, tune=None, # yield the state of all chains in parallel yield traces except KeyboardInterrupt: - for chain,strace in enumerate(traces): + for c,strace in enumerate(traces): strace.close() if hasattr(step, 'report'): step.report._finalize(strace) raise except BaseException: - for chain,strace in enumerate(traces): + for c,strace in enumerate(traces): strace.close() raise else: - for chain,strace in enumerate(traces): + for c,strace in enumerate(traces): strace.close() if hasattr(step, 'report'): step.report._finalize(strace) From a4894bfe86c786b6b609c678cd84d621379857c5 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 22 Nov 2017 16:20:57 +0100 Subject: [PATCH 08/56] print transposes result table --- pymc3/examples/samplers_mvnormal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index e5dcb8cf33..b6546f4004 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -52,4 +52,4 @@ def run(steppers, p): df.set_value(p, name, rate[name]) print('Effective sample size [0...1]') - print(df) + print(df.T) From cd14d6a6f17e2532c5a77781938975a7801f8b09 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 12:54:47 +0100 Subject: [PATCH 09/56] created PopulationArrayStepShared base class that allows the individual sampler to be aware of other chains --- pymc3/step_methods/arraystep.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 564d37e2fc..42bc35343b 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -165,6 +165,52 @@ def step(self, point): return bij.rmap(apoint) +class PopulationArrayStepShared(BlockedStep): + """Faster version of ArrayStep that requires the substep method that does not wrap + the functions the step method uses. + + Works by setting shared variables before using the step. This eliminates the mapping + and unmapping overhead as well as moving fewer variables around. + """ + + def __init__(self, vars, shared, blocked=True): + """ + Parameters + ---------- + vars : list of sampling variables + shared : dict of theano variable -> shared variable + blocked : Boolean (default True) + """ + self.vars = vars + self.ordering = ArrayOrdering(vars) + self.bij = None + self.shared = {str(var): shared for var, shared in shared.items()} + self.blocked = blocked + self.population = None + self.this_chain = None + self.other_chains = None + + def link_population(self, population, chain_index): + self.population = population + self.this_chain = chain_index + self.other_chains = [c for c in range(len(population)) if c != chain_index] + return + + def step(self, point): + for var, share in self.shared.items(): + share.set_value(point[var]) + + if self.bij is None: + self.bij = DictToArrayBijection(self.ordering, point) + + if self.generates_stats: + apoint, stats = self.astep(self.bij.map(point)) + return self.bij.rmap(apoint), stats + else: + apoint = self.astep(self.bij.map(point)) + return self.bij.rmap(apoint) + + class GradientSharedStep(BlockedStep): def __init__(self, vars, model=None, blocked=True, dtype=None, **theano_kwargs): From dbc42bcc4bbbd9a0056dc6ddb315f094bee73737 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 12:56:44 +0100 Subject: [PATCH 10/56] modified sampling loop to account for the PopulationArrayStepShared sampler types --- pymc3/sampling.py | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 240992e06e..ad162cfecc 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -11,6 +11,7 @@ from .backends.base import BaseTrace, MultiTrace from .backends.ndarray import NDArray from .model import modelcontext, Point +from .step_methods import arraystep from .step_methods import (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, Slice, CompoundStep) @@ -601,24 +602,33 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): # chains contains the chain numbers, but for indexing we need indices... - cs = list(range(len(chains))) + nchains = len(chains) model = modelcontext(model) draws = int(draws) + if not isinstance(step, (list, tuple)): + step = [step] + is_compound = len(step) > 1 if random_seed is not None: np.random.seed(random_seed) if draws < 1: raise ValueError('Argument `draws` should be above 0.') - # need indepenently tuned samplers for each chain - is_compound = isinstance(step, list) - if is_compound: - steppers = [CompoundStep(copy(step)) for c in cs] - else: - steppers = [copy(step) for c in cs] - # points tracks the current position of each chain in the parameter space # it is updated as the chains are advanced - points = [Point(start[c], model=model) for c in cs] + points = [Point(start[c], model=model) for c in range(nchains)] + updates = [None] * nchains + + # Set up the steppers + steppers = [None] * nchains + for c in range(nchains): + # need indepenently tuned samplers for each chain + smethods = copy(step) + # link Population samplers to the shared population state + for sm in smethods: + if isinstance(sm, arraystep.PopulationArrayStepShared): + sm.link_population(points, c) + steppers[c] = CompoundStep(smethods) if is_compound else smethods[0] + # prepare a BaseTrace for each chain traces = [_choose_backend(None, c, model=model) for c in chains] @@ -629,7 +639,7 @@ def _iter_chains(draws, chains, step, start, tune=None, else: update_start_vals(start[c], model.test_point, model) # initialize tracking of sampler stats - if step.generates_stats and strace.supports_sampler_stats: + if steppers[c].generates_stats and strace.supports_sampler_stats: strace.setup(draws, c, steppers[c].stats_dtypes) else: strace.setup(draws, c) @@ -637,25 +647,22 @@ def _iter_chains(draws, chains, step, start, tune=None, try: # iterate draws of all chains for i in range(draws): - # snapshot all points (the original will be updated) - last_points = copy(points) - # step each of the chains - for c,strace in enumerate(traces): + for c in range(nchains): if i == tune: steppers[c] = stop_tuning(steppers[c]) + updates[c] = steppers[c].step(points[c]) - # advance this chain (TODO: using the points of all chains) - step_result = steppers[c].step(points[c]) - + # apply the update to the points and record to the traces + for c,strace in enumerate(traces): if steppers[c].generates_stats: - points[c], states = step_result + points[c], states = updates[c] if strace.supports_sampler_stats: strace.record(points[c], states) else: strace.record(points[c]) else: - points[c] = step_result + points[c] = updates[c] strace.record(points[c]) # yield the state of all chains in parallel yield traces From 0ca38043e2646515baa7b274d38aa3e17266f169 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 12:57:10 +0100 Subject: [PATCH 11/56] added the DEMetropolis sampler --- pymc3/examples/samplers_mvnormal.py | 6 +- pymc3/step_methods/__init__.py | 1 + pymc3/step_methods/metropolis.py | 109 +++++++++++++++++++++++++++- 3 files changed, 114 insertions(+), 2 deletions(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index b6546f4004..32c27a4c44 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -40,7 +40,11 @@ def run(steppers, p): if __name__ == '__main__': - methods = [pm.Metropolis, pm.NUTS] + methods = [ + pm.Metropolis, + pm.NUTS, + pm.DEMetropolis + ] names = [c.__name__ for c in methods] df = pd.DataFrame(columns=['p'] + names) df['p'] = [.0,.9] diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py index 67212508d3..d18e36a153 100644 --- a/pymc3/step_methods/__init__.py +++ b/pymc3/step_methods/__init__.py @@ -3,6 +3,7 @@ from .hmc import HamiltonianMC, NUTS from .metropolis import Metropolis +from .metropolis import DEMetropolis from .metropolis import BinaryMetropolis from .metropolis import BinaryGibbsMetropolis from .metropolis import CategoricalGibbsMetropolis diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index cd1c74be35..2a15a76694 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -4,7 +4,8 @@ import scipy.linalg from ..distributions import draw_values -from .arraystep import ArrayStepShared, ArrayStep, metrop_select, Competence +from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence +from ..blocking import DictToArrayBijection import pymc3 as pm from pymc3.theanof import floatX @@ -465,6 +466,112 @@ def competence(var): return Competence.INCOMPATIBLE +class DEMetropolis(PopulationArrayStepShared): + """ + Differential Evolution Metropolis sampling step + + Parameters + ---------- + vars : list + List of variables for sampler + S : standard deviation or covariance matrix + Some measure of variance to parameterize proposal distribution + proposal_dist : function + Function that returns zero-mean deviates when parameterized with + S (and n). Defaults to normal. + scaling : scalar or array + Initial scale factor for proposal. Defaults to 1. + tune : bool + Flag for tuning. Defaults to True. + tune_interval : int + The frequency of tuning. Defaults to 100 iterations. + model : PyMC Model + Optional model for sampling step. Defaults to None (taken from context). + mode : string or `Mode` instance. + compilation mode passed to Theano functions + """ + name = 'DEmetropolis' + + default_blocked = False + generates_stats = True + stats_dtypes = [{ + 'accept': np.float64, + 'tune': np.bool, + }] + + def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1., + tune=True, tune_interval=100, model=None, mode=None, **kwargs): + + model = pm.modelcontext(model) + + if vars is None: + vars = model.vars + vars = pm.inputvars(vars) + + if S is None: + S = np.ones(sum(v.dsize for v in vars)) + + if proposal_dist is not None: + self.proposal_dist = proposal_dist(S) + elif S.ndim == 1: + self.proposal_dist = NormalProposal(S) + elif S.ndim == 2: + self.proposal_dist = MultivariateNormalProposal(S) + else: + raise ValueError("Invalid rank for variance: %s" % S.ndim) + + self.scaling = np.atleast_1d(scaling).astype('d') + self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.ndim) + self.tune = tune + self.tune_interval = tune_interval + self.steps_until_tune = tune_interval + self.accepted = 0 + + self.mode = mode + + shared = pm.make_shared_replacements(vars, model) + self.delta_logp = delta_logp(model.logpt, vars, shared) + super(DEMetropolis, self).__init__(vars, shared) + + def astep(self, q0): + if not self.steps_until_tune and self.tune: + # Tune scaling parameter + self.scaling = tune( + self.scaling, self.accepted / float(self.tune_interval)) + # Reset counter + self.steps_until_tune = self.tune_interval + self.accepted = 0 + + epsilon = self.proposal_dist() * self.scaling + + # differential evolution proposal + # select two other chains + ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False) + r1 = self.bij.map(self.population[ir1]) + r2 = self.bij.map(self.population[ir2]) + # propose a jump + q = floatX(q0 + self.lamb * (r1 - r2) + epsilon) + + accept = self.delta_logp(q, q0) + q_new, accepted = metrop_select(accept, q, q0) + self.accepted += accepted + + self.steps_until_tune -= 1 + + stats = { + 'tune': self.tune, + 'accept': np.exp(accept), + } + + return q_new, [stats] + + @staticmethod + def competence(var, has_grad): + if var.dtype in pm.discrete_types: + return Competence.INCOMPATIBLE + return Competence.COMPATIBLE + + def sample_except(limit, excluded): candidate = nr.choice(limit - 1) if candidate >= excluded: From 2206b456bd137352d31b8ca3dd4665e4213c5b1c Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 13:24:31 +0100 Subject: [PATCH 12/56] raisig an error when the population is too small --- pymc3/step_methods/arraystep.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 42bc35343b..8156e1d081 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -194,6 +194,9 @@ def link_population(self, population, chain_index): self.population = population self.this_chain = chain_index self.other_chains = [c for c in range(len(population)) if c != chain_index] + if not len(self.other_chains) > 1: + raise ValueError('Population ist just {} + {}. This is too small. You should ' \ + 'increase the number of chains.'.format(self.this_chain, self.other_chains)) return def step(self, point): From 6e63f4960e5275222a72531649c84c489127bd86 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 14:52:28 +0100 Subject: [PATCH 13/56] verbose debug logging --- pymc3/sampling.py | 3 +++ pymc3/step_methods/arraystep.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index ad162cfecc..caa35b2b7d 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -620,11 +620,14 @@ def _iter_chains(draws, chains, step, start, tune=None, # Set up the steppers steppers = [None] * nchains + pm._log.info('Sampler setup') for c in range(nchains): + pm._log.info('Chain {}'.format(c)) # need indepenently tuned samplers for each chain smethods = copy(step) # link Population samplers to the shared population state for sm in smethods: + pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars)) if isinstance(sm, arraystep.PopulationArrayStepShared): sm.link_population(points, c) steppers[c] = CompoundStep(smethods) if is_compound else smethods[0] diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 8156e1d081..be640806f9 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -5,6 +5,7 @@ import numpy as np from numpy.random import uniform from enum import IntEnum, unique +import pymc3 as pm __all__ = [ 'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence'] @@ -195,8 +196,9 @@ def link_population(self, population, chain_index): self.this_chain = chain_index self.other_chains = [c for c in range(len(population)) if c != chain_index] if not len(self.other_chains) > 1: - raise ValueError('Population ist just {} + {}. This is too small. You should ' \ + raise ValueError('Population is just {} + {}. This is too small. You should ' \ 'increase the number of chains.'.format(self.this_chain, self.other_chains)) + pm._log.info('Sampler {} in population {}'.format(self.this_chain, self.other_chains)) return def step(self, point): From 9395de3c0ca590f029aa37893bf4fe138761fcb7 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 15:40:06 +0100 Subject: [PATCH 14/56] removed debug print --- pymc3/step_methods/arraystep.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index be640806f9..590785cd01 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -5,7 +5,6 @@ import numpy as np from numpy.random import uniform from enum import IntEnum, unique -import pymc3 as pm __all__ = [ 'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence'] @@ -198,7 +197,6 @@ def link_population(self, population, chain_index): if not len(self.other_chains) > 1: raise ValueError('Population is just {} + {}. This is too small. You should ' \ 'increase the number of chains.'.format(self.this_chain, self.other_chains)) - pm._log.info('Sampler {} in population {}'.format(self.this_chain, self.other_chains)) return def step(self, point): From 6105dec3cfc569549032ab3dbc03df92c83c5004 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 15:41:51 +0100 Subject: [PATCH 15/56] forcing CompoundStep type --- pymc3/sampling.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index caa35b2b7d..1715526926 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -339,6 +339,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, if model.ndim == 0: raise ValueError('The model does not contain any free variables.') + if step is None and init is not None and pm.model.all_continuous(model.vars): try: # By default, try to use NUTS @@ -359,6 +360,15 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, else: step = assign_step_methods(model, step, step_kwargs=step_kwargs) + if not isinstance(step, CompoundStep): + if not isinstance(step, (list, tuple)): + step = [step] + step = CompoundStep(step) + + # print the sampler assignment + for sm in step.methods: + pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars)) + if start is None: start = [None] * chains if isinstance(start, dict): @@ -601,13 +611,12 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): + if not isinstance(step, CompoundStep): + raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__)) # chains contains the chain numbers, but for indexing we need indices... nchains = len(chains) model = modelcontext(model) draws = int(draws) - if not isinstance(step, (list, tuple)): - step = [step] - is_compound = len(step) > 1 if random_seed is not None: np.random.seed(random_seed) if draws < 1: @@ -620,17 +629,14 @@ def _iter_chains(draws, chains, step, start, tune=None, # Set up the steppers steppers = [None] * nchains - pm._log.info('Sampler setup') for c in range(nchains): - pm._log.info('Chain {}'.format(c)) # need indepenently tuned samplers for each chain - smethods = copy(step) + chainstep = copy(step) # link Population samplers to the shared population state - for sm in smethods: - pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars)) + for sm in chainstep.methods: if isinstance(sm, arraystep.PopulationArrayStepShared): sm.link_population(points, c) - steppers[c] = CompoundStep(smethods) if is_compound else smethods[0] + steppers[c] = chainstep # prepare a BaseTrace for each chain From 0494bc0264b3eaa0b5054e2055a275a0a5297fd6 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 15:42:18 +0100 Subject: [PATCH 16/56] formatting --- pymc3/sampling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 1715526926..273dd48c8e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -339,7 +339,6 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, if model.ndim == 0: raise ValueError('The model does not contain any free variables.') - if step is None and init is not None and pm.model.all_continuous(model.vars): try: # By default, try to use NUTS From e25ce1962652bc51059bf9d6b68af9ed5e6d05ed Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 15:42:59 +0100 Subject: [PATCH 17/56] setting DEMetropolis as a blocked step method --- pymc3/step_methods/metropolis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 2a15a76694..de66631168 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -490,9 +490,9 @@ class DEMetropolis(PopulationArrayStepShared): mode : string or `Mode` instance. compilation mode passed to Theano functions """ - name = 'DEmetropolis' + name = 'DEMetropolis' - default_blocked = False + default_blocked = True generates_stats = True stats_dtypes = [{ 'accept': np.float64, From 2841442270fe83e6ad8d88ab41dae8a9b99cd5d0 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 23 Nov 2017 15:45:28 +0100 Subject: [PATCH 18/56] measuring the runtime, example with both 2D z-variable and two 1D x,y variables --- pymc3/examples/samplers_mvnormal.py | 59 ++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 32c27a4c44..50e79d1174 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -5,38 +5,55 @@ import numpy as np +import time import pandas as pd import pymc3 as pm import theano.tensor as tt import matplotlib.pyplot as plt - +USE_XY = True def run(steppers, p): steppers = set(steppers) traces = {} effn = {} + runtimes = {} with pm.Model() as model: - mu = np.array([0.,0.]) - cov = np.array([[1.,p],[p,1.]]) - z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,)) + if USE_XY: + x = pm.Flat('x') + y = pm.Flat('y') + mu = np.array([0.,0.]) + cov = np.array([[1.,p],[p,1.]]) + z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y])) + p = pm.Potential('logp_xy', z) + start = {'x': 0, 'y': 0} + else: + mu = np.array([0.,0.]) + cov = np.array([[1.,p],[p,1.]]) + z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,)) + start={'z': [0, 0]} for step_cls in steppers: name = step_cls.__name__ - print('Step method: {}'.format(name)) + print('\r\nStep method: {}'.format(name)) + t_start = time.time() mt = pm.sample( draws=10000, njobs=1, chains=6, step=step_cls(), - start={'z': [0, 0]} + start=start ) + runtimes[name] = time.time() - t_start traces[name] = mt en = pm.diagnostics.effective_n(mt) - effn[name] = np.mean(en['z']) / len(mt) / mt.nchains - return traces, effn + if USE_XY: + effn[name] = np.mean(en['x']) / len(mt) / mt.nchains + else: + effn[name] = np.mean(en['z']) / len(mt) / mt.nchains + return traces, effn, runtimes if __name__ == '__main__': @@ -46,14 +63,22 @@ def run(steppers, p): pm.DEMetropolis ] names = [c.__name__ for c in methods] - df = pd.DataFrame(columns=['p'] + names) - df['p'] = [.0,.9] - df = df.set_index('p') - rates = [] - for p in df.index: - trace, rate = run(methods, p) + df_effectiven = pd.DataFrame(columns=['p'] + names) + df_effectiven['p'] = [.0,.9] + df_effectiven = df_effectiven.set_index('p') + + df_runtime = pd.DataFrame(columns=['p'] + names) + df_runtime['p'] = [.0,.9] + df_runtime = df_runtime.set_index('p') + + for p in df_effectiven.index: + trace, rate, runtime = run(methods, p) for name in names: - df.set_value(p, name, rate[name]) + df_effectiven.set_value(p, name, rate[name]) + df_runtime.set_value(p, name, runtime[name]) + + print('\r\nEffective sample size [0...1]') + print(df_effectiven.T) - print('Effective sample size [0...1]') - print(df.T) + print('\r\nRuntime [s]') + print(df_runtime.T) From fd6a1efa36fad415befa6deedd9e1a1d5d6d76b7 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 24 Nov 2017 11:46:18 +0100 Subject: [PATCH 19/56] changed the initialization order such that variable transforms are applied --- pymc3/sampling.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 273dd48c8e..fdd9374421 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -621,12 +621,28 @@ def _iter_chains(draws, chains, step, start, tune=None, if draws < 1: raise ValueError('Argument `draws` should be above 0.') - # points tracks the current position of each chain in the parameter space + # IMPORTANT: The initialization of traces, samplers and points must happen in the right order: + # 1. traces are initialized and update_start_vals configures variable transforms + # 2. population of points is created + # 3. steppers are initialized and linked to the points object + # 4. traces are configured to track the sampler stats + + + # 1. prepare a BaseTrace for each chain + traces = [_choose_backend(None, c, model=model) for c in chains] + for c,strace in enumerate(traces): + # initialize the trace size + if len(strace) > 0: + update_start_vals(start[c], strace.point(-1), model) + else: + update_start_vals(start[c], model.test_point, model) + + # 2. create a population (points) that tracks each chain # it is updated as the chains are advanced points = [Point(start[c], model=model) for c in range(nchains)] updates = [None] * nchains - # Set up the steppers + # 3. Set up the steppers steppers = [None] * nchains for c in range(nchains): # need indepenently tuned samplers for each chain @@ -637,20 +653,12 @@ def _iter_chains(draws, chains, step, start, tune=None, sm.link_population(points, c) steppers[c] = chainstep - - # prepare a BaseTrace for each chain - traces = [_choose_backend(None, c, model=model) for c in chains] - for c,strace in enumerate(traces): - # initialize the trace size - if len(strace) > 0: - update_start_vals(start[c], strace.point(-1), model) - else: - update_start_vals(start[c], model.test_point, model) - # initialize tracking of sampler stats - if steppers[c].generates_stats and strace.supports_sampler_stats: - strace.setup(draws, c, steppers[c].stats_dtypes) + # 4. configure tracking of sampler stats + for c in range(nchains): + if steppers[c].generates_stats and traces[c].supports_sampler_stats: + traces[c].setup(draws, c, steppers[c].stats_dtypes) else: - strace.setup(draws, c) + traces[c].setup(draws, c) try: # iterate draws of all chains From cf4ee2f11750dbd3085c94e30c7e83b59232735e Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 24 Nov 2017 15:25:44 +0100 Subject: [PATCH 20/56] fixed a bug caused by start=None --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index fdd9374421..a3fe370f64 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -369,7 +369,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars)) if start is None: - start = [None] * chains + start = {} if isinstance(start, dict): start = [start] * chains From 6733ae155f141c65c8f43a56e94e5918f21b026c Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Mon, 27 Nov 2017 16:02:49 +0100 Subject: [PATCH 21/56] fixes a bug in computing lambda --- pymc3/step_methods/metropolis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index de66631168..9beed65b89 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -521,7 +521,7 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1., raise ValueError("Invalid rank for variance: %s" % S.ndim) self.scaling = np.atleast_1d(scaling).astype('d') - self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.ndim) + self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size) self.tune = tune self.tune_interval = tune_interval self.steps_until_tune = tune_interval From 368f4aa9dcd98f48949c6ab02ed06f43064931a9 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Mon, 27 Nov 2017 17:20:00 +0100 Subject: [PATCH 22/56] using a Uniform proposal with low initial scale --- pymc3/step_methods/metropolis.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 9beed65b89..f0f82daa1c 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -26,6 +26,11 @@ def __call__(self): return nr.normal(scale=self.s) +class UniformProposal(Proposal): + def __call__(self): + return nr.uniform(low=-self.s, high=self.s, size=len(self.s)) + + class CauchyProposal(Proposal): def __call__(self): return nr.standard_cauchy(size=np.size(self.s)) * self.s @@ -499,7 +504,7 @@ class DEMetropolis(PopulationArrayStepShared): 'tune': np.bool, }] - def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1., + def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001, tune=True, tune_interval=100, model=None, mode=None, **kwargs): model = pm.modelcontext(model) @@ -513,12 +518,8 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1., if proposal_dist is not None: self.proposal_dist = proposal_dist(S) - elif S.ndim == 1: - self.proposal_dist = NormalProposal(S) - elif S.ndim == 2: - self.proposal_dist = MultivariateNormalProposal(S) else: - raise ValueError("Invalid rank for variance: %s" % S.ndim) + self.proposal_dist = UniformProposal(S) self.scaling = np.atleast_1d(scaling).astype('d') self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size) From abcb12ff6f9be62387ee53660dd9b7652be0ab34 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Mon, 27 Nov 2017 18:40:51 +0100 Subject: [PATCH 23/56] renamed local variable --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index a3fe370f64..2a80038bf5 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -629,7 +629,7 @@ def _iter_chains(draws, chains, step, start, tune=None, # 1. prepare a BaseTrace for each chain - traces = [_choose_backend(None, c, model=model) for c in chains] + traces = [_choose_backend(None, chain, model=model) for chain in chains] for c,strace in enumerate(traces): # initialize the trace size if len(strace) > 0: From acf13115902735811607ad0938d7b47ce1a8941b Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 13:56:14 +0100 Subject: [PATCH 24/56] logging the crossover and scaling --- pymc3/step_methods/metropolis.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index f0f82daa1c..a49fb0b59e 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -502,6 +502,9 @@ class DEMetropolis(PopulationArrayStepShared): stats_dtypes = [{ 'accept': np.float64, 'tune': np.bool, + 'ir1': np.int, + 'ir2': np.int, + 'scaling': np.float64, }] def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001, @@ -562,6 +565,9 @@ def astep(self, q0): stats = { 'tune': self.tune, 'accept': np.exp(accept), + 'ir1': ir1, + 'ir2': ir2, + 'scaling': self.scaling } return q_new, [stats] From 3b6a2d92cef276d21cfbd9bcd73464b2dce5924f Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 14:36:52 +0100 Subject: [PATCH 25/56] fixed a bug that caused step methods to not be copied --- pymc3/sampling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 2a80038bf5..385bc8d97e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -646,7 +646,11 @@ def _iter_chains(draws, chains, step, start, tune=None, steppers = [None] * nchains for c in range(nchains): # need indepenently tuned samplers for each chain - chainstep = copy(step) + # it is important to copy the actual steppers (but not the delta_logp) + if isinstance(step, CompoundStep): + chainstep = CompoundStep([copy(m) for m in step.methods]) + else: + chainstep = copy(step) # link Population samplers to the shared population state for sm in chainstep.methods: if isinstance(sm, arraystep.PopulationArrayStepShared): From 27e826382a197d606821ca76e2cb5f4044c81825 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 16:51:16 +0100 Subject: [PATCH 26/56] smarter multiprocessing --- pymc3/sampling.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 385bc8d97e..aaefa81268 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -391,9 +391,13 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, sample_args.update(kwargs) - parallel = njobs > 1 and chains > 1 + has_population_samplers = np.any([ + isinstance(m, arraystep.PopulationArrayStepShared) + for m in step.methods + ]) + parallel = njobs > 1 and chains > 1 and not has_population_samplers if parallel: - pm._log.info('Multiprocess sampling ({} jobs, {} chains)'.format(njobs, chains)) + pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs)) try: trace = _mp_sample(**sample_args) except pickle.PickleError: @@ -408,7 +412,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, else: raise if not parallel: - pm._log.info('Multichain sampling ({} jobs, {} chains)'.format(njobs, chains)) + pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains)) trace = _sample_many(**sample_args) discard = tune if discard_tuned_samples else 0 From d83c5f4d997b9e68aa7adbbe899fe4816137b735 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 16:53:41 +0100 Subject: [PATCH 27/56] automatic multiprocessing decision, reporting relative sampling rates --- pymc3/examples/samplers_mvnormal.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 50e79d1174..626ce70557 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -41,11 +41,11 @@ def run(steppers, p): t_start = time.time() mt = pm.sample( draws=10000, - njobs=1, chains=6, step=step_cls(), start=start ) + print('{} samples across {} chains'.format(len(mt), mt.nchains)) runtimes[name] = time.time() - t_start traces[name] = mt en = pm.diagnostics.effective_n(mt) @@ -63,22 +63,32 @@ def run(steppers, p): pm.DEMetropolis ] names = [c.__name__ for c in methods] - df_effectiven = pd.DataFrame(columns=['p'] + names) - df_effectiven['p'] = [.0,.9] - df_effectiven = df_effectiven.set_index('p') - df_runtime = pd.DataFrame(columns=['p'] + names) - df_runtime['p'] = [.0,.9] - df_runtime = df_runtime.set_index('p') + df_base = pd.DataFrame(columns=['p'] + names) + df_base['p'] = [.0,.9] + df_base = df_base.set_index('p') + + df_effectiven = df_base.copy() + df_runtime = df_base.copy() + df_performance = df_base.copy() for p in df_effectiven.index: trace, rate, runtime = run(methods, p) for name in names: df_effectiven.set_value(p, name, rate[name]) df_runtime.set_value(p, name, runtime[name]) + df_performance.set_value(p, name, rate[name] / runtime[name]) print('\r\nEffective sample size [0...1]') - print(df_effectiven.T) + print(df_effectiven.T.to_string(float_format='{:.3f}'.format)) print('\r\nRuntime [s]') - print(df_runtime.T) + print(df_runtime.T.to_string(float_format='{:.1f}'.format)) + + if 'NUTS' in names: + print('\r\nNormalized effective sampling rate [0...1]') + df_performance = df_performance.T / df_performance.loc[0]['NUTS'] + else: + print('\r\nNormalized effective sampling rate [1/s]') + df_performance = df_performance.T + print(df_performance.to_string(float_format='{:.0f}'.format)) From fc4e1d0e5200b5df98ed807959fb618cc1621d25 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 16:55:04 +0100 Subject: [PATCH 28/56] print format --- pymc3/examples/samplers_mvnormal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 626ce70557..a7d882ae1c 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -91,4 +91,4 @@ def run(steppers, p): else: print('\r\nNormalized effective sampling rate [1/s]') df_performance = df_performance.T - print(df_performance.to_string(float_format='{:.0f}'.format)) + print(df_performance.to_string(float_format='{:.3f}'.format)) From 569731bccfb74be9d52e2f9e893cd7736cd7fd9d Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 17:30:24 +0100 Subject: [PATCH 29/56] inheriting PopulationArraySharedStep from ArrayStepShared, using a bij attribute because DEMetropolis will use it --- pymc3/step_methods/arraystep.py | 49 ++++++++++++++------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 590785cd01..581f77ec18 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -150,27 +150,28 @@ def __init__(self, vars, shared, blocked=True): self.ordering = ArrayOrdering(vars) self.shared = {str(var): shared for var, shared in shared.items()} self.blocked = blocked + self.bij = None def step(self, point): for var, share in self.shared.items(): share.set_value(point[var]) - bij = DictToArrayBijection(self.ordering, point) + if self.bij is None: + self.bij = DictToArrayBijection(self.ordering, point) if self.generates_stats: - apoint, stats = self.astep(bij.map(point)) - return bij.rmap(apoint), stats + apoint, stats = self.astep(self.bij.map(point)) + return self.bij.rmap(apoint), stats else: - apoint = self.astep(bij.map(point)) - return bij.rmap(apoint) + apoint = self.astep(self.bij.map(point)) + return self.bij.rmap(apoint) -class PopulationArrayStepShared(BlockedStep): - """Faster version of ArrayStep that requires the substep method that does not wrap - the functions the step method uses. +class PopulationArrayStepShared(ArrayStepShared): + """Version of ArrayStepShared that allows samplers to access the states + of other chains in the population. - Works by setting shared variables before using the step. This eliminates the mapping - and unmapping overhead as well as moving fewer variables around. + Works by linking a list of Points that is updated as the chains are iterated. """ def __init__(self, vars, shared, blocked=True): @@ -181,16 +182,20 @@ def __init__(self, vars, shared, blocked=True): shared : dict of theano variable -> shared variable blocked : Boolean (default True) """ - self.vars = vars - self.ordering = ArrayOrdering(vars) - self.bij = None - self.shared = {str(var): shared for var, shared in shared.items()} - self.blocked = blocked self.population = None self.this_chain = None self.other_chains = None + return super().__init__(vars, shared, blocked) def link_population(self, population, chain_index): + """Links the sampler to the population. + + Parameters + ---------- + population : list of Points. (The elements of this list must be + replaced with current chain states in every iteration.) + chain_index : int of the index of this sampler in the population + """ self.population = population self.this_chain = chain_index self.other_chains = [c for c in range(len(population)) if c != chain_index] @@ -199,20 +204,6 @@ def link_population(self, population, chain_index): 'increase the number of chains.'.format(self.this_chain, self.other_chains)) return - def step(self, point): - for var, share in self.shared.items(): - share.set_value(point[var]) - - if self.bij is None: - self.bij = DictToArrayBijection(self.ordering, point) - - if self.generates_stats: - apoint, stats = self.astep(self.bij.map(point)) - return self.bij.rmap(apoint), stats - else: - apoint = self.astep(self.bij.map(point)) - return self.bij.rmap(apoint) - class GradientSharedStep(BlockedStep): def __init__(self, vars, model=None, blocked=True, From 8de3977c29c4bd21961ba25f972b087aa165c990 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 17:31:53 +0100 Subject: [PATCH 30/56] printing the number of effective samples per variable --- pymc3/examples/samplers_mvnormal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index a7d882ae1c..2f30e79cf4 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -45,10 +45,11 @@ def run(steppers, p): step=step_cls(), start=start ) - print('{} samples across {} chains'.format(len(mt), mt.nchains)) runtimes[name] = time.time() - t_start + print('{} samples across {} chains'.format(len(mt), mt.nchains)) traces[name] = mt en = pm.diagnostics.effective_n(mt) + print(en) if USE_XY: effn[name] = np.mean(en['x']) / len(mt) / mt.nchains else: From 67e07a4e72bf39b668d7596608ff1f411cbb6475 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 17:43:11 +0100 Subject: [PATCH 31/56] docstrings and comments --- pymc3/sampling.py | 6 +++--- pymc3/step_methods/metropolis.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index aaefa81268..5dfe39042c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -635,7 +635,7 @@ def _iter_chains(draws, chains, step, start, tune=None, # 1. prepare a BaseTrace for each chain traces = [_choose_backend(None, chain, model=model) for chain in chains] for c,strace in enumerate(traces): - # initialize the trace size + # initialize the trace size and variable transforms if len(strace) > 0: update_start_vals(start[c], strace.point(-1), model) else: @@ -649,13 +649,13 @@ def _iter_chains(draws, chains, step, start, tune=None, # 3. Set up the steppers steppers = [None] * nchains for c in range(nchains): - # need indepenently tuned samplers for each chain + # need indepenent samplers for each chain # it is important to copy the actual steppers (but not the delta_logp) if isinstance(step, CompoundStep): chainstep = CompoundStep([copy(m) for m in step.methods]) else: chainstep = copy(step) - # link Population samplers to the shared population state + # link population samplers to the shared population state for sm in chainstep.methods: if isinstance(sm, arraystep.PopulationArrayStepShared): sm.link_population(points, c) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index a49fb0b59e..386b63da9b 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -473,21 +473,23 @@ def competence(var): class DEMetropolis(PopulationArrayStepShared): """ - Differential Evolution Metropolis sampling step + Differential Evolution Metropolis sampling step. Parameters ---------- + lamb : float + Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim) vars : list List of variables for sampler S : standard deviation or covariance matrix Some measure of variance to parameterize proposal distribution proposal_dist : function Function that returns zero-mean deviates when parameterized with - S (and n). Defaults to normal. + S (and n). Defaults to Uniform(-S,+S). scaling : scalar or array - Initial scale factor for proposal. Defaults to 1. + Initial scale factor for epsilon. Defaults to 0.001 tune : bool - Flag for tuning. Defaults to True. + Flag for tuning the scaling. Defaults to True. tune_interval : int The frequency of tuning. Defaults to 100 iterations. model : PyMC Model From 5769b9de0bcfc787c03386783ebd5acaee69276a Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 17:45:49 +0100 Subject: [PATCH 32/56] falling back to sequential sampling if no population samplers are used --- pymc3/sampling.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 5dfe39042c..d90492237e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -401,19 +401,22 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, try: trace = _mp_sample(**sample_args) except pickle.PickleError: - pm._log.warn("Could not pickle model, sampling sequentially.") + pm._log.warn("Could not pickle model, sampling singlethreaded.") pm._log.debug('Pickling error:', exec_info=True) parallel = False except AttributeError as e: if str(e).startswith("AttributeError: Can't pickle"): - pm._log.warn("Could not pickle model, sampling sequentially.") + pm._log.warn("Could not pickle model, sampling singlethreaded.") pm._log.debug('Pickling error:', exec_info=True) parallel = False else: raise - if not parallel: + if not parallel and has_population_samplers: pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains)) trace = _sample_many(**sample_args) + else: + pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) + trace = _sample_many_sequentially(**sample_args) discard = tune if discard_tuned_samples else 0 return trace[discard:] From 5f6c29cc396542e84f4ecc134e6e5a622bea5553 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 17:48:32 +0100 Subject: [PATCH 33/56] removed debugging stats logging --- pymc3/step_methods/metropolis.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 386b63da9b..e4254b45a0 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -504,9 +504,6 @@ class DEMetropolis(PopulationArrayStepShared): stats_dtypes = [{ 'accept': np.float64, 'tune': np.bool, - 'ir1': np.int, - 'ir2': np.int, - 'scaling': np.float64, }] def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001, @@ -567,9 +564,6 @@ def astep(self, q0): stats = { 'tune': self.tune, 'accept': np.exp(accept), - 'ir1': ir1, - 'ir2': ir2, - 'scaling': self.scaling } return q_new, [stats] From b53b510d52d0f5905a503580a54a897c8fd4f922 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 20:32:35 +0100 Subject: [PATCH 34/56] fixed nested if else --- pymc3/sampling.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index d90492237e..29d4ed6556 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -411,12 +411,13 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, parallel = False else: raise - if not parallel and has_population_samplers: - pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains)) - trace = _sample_many(**sample_args) - else: - pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) - trace = _sample_many_sequentially(**sample_args) + if not parallel: + if has_population_samplers: + pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains)) + trace = _sample_many(**sample_args) + else: + pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) + trace = _sample_many_sequentially(**sample_args) discard = tune if discard_tuned_samples else 0 return trace[discard:] From 4af2aa5b0432b7d3581d03dd16d269a3274b381d Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 28 Nov 2017 20:44:09 +0100 Subject: [PATCH 35/56] updated print statement --- pymc3/examples/samplers_mvnormal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 2f30e79cf4..60e9bb4c15 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -46,7 +46,7 @@ def run(steppers, p): start=start ) runtimes[name] = time.time() - t_start - print('{} samples across {} chains'.format(len(mt), mt.nchains)) + print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains)) traces[name] = mt en = pm.diagnostics.effective_n(mt) print(en) From e5f7ff2ffb88781d2dec06961e551e174251a074 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 10:55:28 +0100 Subject: [PATCH 36/56] fixed a bug related to bijection updating --- pymc3/step_methods/arraystep.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 581f77ec18..7f3bffd406 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -156,8 +156,7 @@ def step(self, point): for var, share in self.shared.items(): share.set_value(point[var]) - if self.bij is None: - self.bij = DictToArrayBijection(self.ordering, point) + self.bij = DictToArrayBijection(self.ordering, point) if self.generates_stats: apoint, stats = self.astep(self.bij.map(point)) From 874e6b2bfd49a6176e283018baabb5cae501d3dd Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 12:30:34 +0100 Subject: [PATCH 37/56] docstring and comments --- pymc3/examples/samplers_mvnormal.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 60e9bb4c15..a702ebe1cd 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -1,6 +1,9 @@ """ Comparing different samplers on a correlated bivariate normal distribution. +This example will sample a bivariate normal with Metropolis, NUTS and DEMetropolis +at two correlations (0, 0.9) and print out the effective sample sizes, runtime and +normalized effective sampling rates. """ @@ -11,7 +14,10 @@ import theano.tensor as tt import matplotlib.pyplot as plt - +# with this flag one can switch between defining the bivariate normal as +# either a 2D MvNormal (USE_XY = False) split up the two dimensions into +# two variables 'x' and 'y'. The latter is recommended because it highlights +# different behaviour with respect to blocking. USE_XY = True def run(steppers, p): From eba4a9d46a7cf5a7eb2299fd602ac3ceeb2d686d Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 12:40:21 +0100 Subject: [PATCH 38/56] refactoring for better clarity and less diff --- pymc3/sampling.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 29d4ed6556..f2b2516237 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -413,11 +413,11 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, raise if not parallel: if has_population_samplers: - pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains)) - trace = _sample_many(**sample_args) + pm._log.info('Population sampling ({} chains in 1 job)'.format(chains)) + trace = _sample_population(**sample_args) else: pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) - trace = _sample_many_sequentially(**sample_args) + trace = _sample_many(**sample_args) discard = tune if discard_tuned_samples else 0 return trace[discard:] @@ -450,7 +450,7 @@ def _check_start_shape(model, start): raise ValueError("Bad shape for start argument:{}".format(e)) -def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs): +def _sample_many(draws, chain, chains, start, random_seed, **kwargs): traces = [] for i in range(chains): trace = _sample(draws=draws, chain=chain + i, start=start[i], @@ -469,10 +469,12 @@ def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs return MultiTrace(traces) -def _sample_many(draws, chain, chains, start, random_seed, step, tune, model, progressbar=None, **kwargs): +def _sample_population(draws, chain, chains, start, random_seed, step, tune, + model, progressbar=None, **kwargs): # create the generator that iterates all chains in parallel chains = [chain + c for c in range(chains)] - sampling = _iter_chains(draws, chains, step, start, tune=tune, model=model, random_seed=random_seed) + sampling = _iter_chains(draws, chains, step, start, tune=tune, + model=model, random_seed=random_seed) if progressbar: sampling = tqdm(sampling, total=draws) From f71a37cb663add3cddd81adae688ba96636a6d32 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 12:49:35 +0100 Subject: [PATCH 39/56] code style --- pymc3/sampling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index f2b2516237..2ebed3561c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -622,6 +622,7 @@ def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): if not isinstance(step, CompoundStep): raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__)) + # chains contains the chain numbers, but for indexing we need indices... nchains = len(chains) model = modelcontext(model) @@ -631,7 +632,7 @@ def _iter_chains(draws, chains, step, start, tune=None, if draws < 1: raise ValueError('Argument `draws` should be above 0.') - # IMPORTANT: The initialization of traces, samplers and points must happen in the right order: + # The initialization of traces, samplers and points must happen in the right order: # 1. traces are initialized and update_start_vals configures variable transforms # 2. population of points is created # 3. steppers are initialized and linked to the points object From 63ae017bfe2fb21208e7e684cf57cba3bb21ddb1 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 14:12:18 +0100 Subject: [PATCH 40/56] removed unused import --- pymc3/step_methods/metropolis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index e4254b45a0..4ce3a88c41 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -5,7 +5,6 @@ from ..distributions import draw_values from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence -from ..blocking import DictToArrayBijection import pymc3 as pm from pymc3.theanof import floatX From e20295a12d0f9226ec60e24041d02b01802d477e Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 19:31:07 +0100 Subject: [PATCH 41/56] fixed a bug where Slice was preferred on multidimensional variables --- pymc3/step_methods/slicer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py index 3b5ec241cb..f5f7d7e9c5 100644 --- a/pymc3/step_methods/slicer.py +++ b/pymc3/step_methods/slicer.py @@ -97,7 +97,7 @@ def astep(self, q0, logp): @staticmethod def competence(var, has_grad): if var.dtype in continuous_types: - if not var.shape: + if not var.shape or var.shape.ndim == 1: return Competence.PREFERRED return Competence.COMPATIBLE return Competence.INCOMPATIBLE From acc7538c65cb391b8a990f9e160bfa6641d8fff5 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 19:35:13 +0100 Subject: [PATCH 42/56] printing the stepper hierarchy, fixed a variable name, handling non-CompoundStep methods --- pymc3/sampling.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 2ebed3561c..49332df48c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -145,6 +145,19 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, return instantiate_steppers(model, steps, selected_steps, step_kwargs) +def print_step_hierarchy(s, level=0): + if isinstance(s, (list, tuple)): + pm._log.info('>' * level + 'list') + for i in s: + print_step_hierarchy(i, level+1) + elif isinstance(s, CompoundStep): + pm._log.info('>' * level + 'CompoundStep') + for i in s.methods: + print_step_hierarchy(i, level+1) + else: + pm._log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars)) + + def _cpu_count(): """Try to guess the number of CPUs in the system. @@ -359,15 +372,8 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, else: step = assign_step_methods(model, step, step_kwargs=step_kwargs) - if not isinstance(step, CompoundStep): - if not isinstance(step, (list, tuple)): - step = [step] + if isinstance(step, list): step = CompoundStep(step) - - # print the sampler assignment - for sm in step.methods: - pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars)) - if start is None: start = {} if isinstance(start, dict): @@ -393,11 +399,12 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, has_population_samplers = np.any([ isinstance(m, arraystep.PopulationArrayStepShared) - for m in step.methods + for m in (step.methods if isinstance(step, CompoundStep) else [step]) ]) parallel = njobs > 1 and chains > 1 and not has_population_samplers if parallel: pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs)) + print_step_hierarchy(step) try: trace = _mp_sample(**sample_args) except pickle.PickleError: @@ -414,9 +421,11 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, if not parallel: if has_population_samplers: pm._log.info('Population sampling ({} chains in 1 job)'.format(chains)) + print_step_hierarchy(step) trace = _sample_population(**sample_args) else: pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) + print_step_hierarchy(step) trace = _sample_many(**sample_args) discard = tune if discard_tuned_samples else 0 @@ -483,7 +492,7 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune, for it,traces in enumerate(sampling): latest_traces = traces # TODO: add support for liveplot during population-sampling - return MultiTrace(traces) + return MultiTrace(latest_traces) def _sample(chain, progressbar, random_seed, start, draws=None, step=None, @@ -620,9 +629,6 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): - if not isinstance(step, CompoundStep): - raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__)) - # chains contains the chain numbers, but for indexing we need indices... nchains = len(chains) model = modelcontext(model) @@ -663,7 +669,7 @@ def _iter_chains(draws, chains, step, start, tune=None, else: chainstep = copy(step) # link population samplers to the shared population state - for sm in chainstep.methods: + for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]): if isinstance(sm, arraystep.PopulationArrayStepShared): sm.link_population(points, c) steppers[c] = chainstep From c3c233c00394996ff0506543379629f677730208 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 19:36:15 +0100 Subject: [PATCH 43/56] fixed a bug where DEMetropolis assigned itself to discrete vars, fixed a bug that happened when only one var was assigned --- pymc3/step_methods/metropolis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 4ce3a88c41..4deaf562de 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -505,13 +505,13 @@ class DEMetropolis(PopulationArrayStepShared): 'tune': np.bool, }] - def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001, + def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, tune=True, tune_interval=100, model=None, mode=None, **kwargs): model = pm.modelcontext(model) if vars is None: - vars = model.vars + vars = model.cont_vars vars = pm.inputvars(vars) if S is None: @@ -523,7 +523,9 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.0 self.proposal_dist = UniformProposal(S) self.scaling = np.atleast_1d(scaling).astype('d') - self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size) + if lamb is None: + lamb = 2.38 / np.sqrt(2 * S.size) + self.lamb = float(lamb) self.tune = tune self.tune_interval = tune_interval self.steps_until_tune = tune_interval From 01adad83c630b1212e754b176a279e94bdad9d0e Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 29 Nov 2017 20:18:37 +0100 Subject: [PATCH 44/56] improved code style, including Slice in comparison --- pymc3/examples/samplers_mvnormal.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index a702ebe1cd..c7007a562f 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -12,7 +12,6 @@ import pandas as pd import pymc3 as pm import theano.tensor as tt -import matplotlib.pyplot as plt # with this flag one can switch between defining the bivariate normal as # either a 2D MvNormal (USE_XY = False) split up the two dimensions into @@ -33,7 +32,7 @@ def run(steppers, p): mu = np.array([0.,0.]) cov = np.array([[1.,p],[p,1.]]) z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y])) - p = pm.Potential('logp_xy', z) + pot = pm.Potential('logp_xy', z) start = {'x': 0, 'y': 0} else: mu = np.array([0.,0.]) @@ -43,7 +42,6 @@ def run(steppers, p): for step_cls in steppers: name = step_cls.__name__ - print('\r\nStep method: {}'.format(name)) t_start = time.time() mt = pm.sample( draws=10000, @@ -55,7 +53,7 @@ def run(steppers, p): print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains)) traces[name] = mt en = pm.diagnostics.effective_n(mt) - print(en) + print('effective: {}\r\n'.format(en)) if USE_XY: effn[name] = np.mean(en['x']) / len(mt) / mt.nchains else: @@ -66,6 +64,7 @@ def run(steppers, p): if __name__ == '__main__': methods = [ pm.Metropolis, + pm.Slice, pm.NUTS, pm.DEMetropolis ] From 44b111570e6e7a4074de988528952a0bc249fb05 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 30 Nov 2017 11:46:40 +0100 Subject: [PATCH 45/56] including DEMetropolis in existing tests, added test case for PopulationSamplers --- pymc3/tests/test_step.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index 22eb7053cc..8e12607d95 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -9,7 +9,7 @@ from pymc3.step_methods import (NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, Metropolis, Slice, CompoundStep, NormalProposal, MultivariateNormalProposal, HamiltonianMC, - EllipticalSlice, smc) + EllipticalSlice, smc, DEMetropolis) from pymc3.theanof import floatX from pymc3 import SamplingError from pymc3.distributions import ( @@ -318,7 +318,7 @@ def test_mv_proposal(self): class TestCompoundStep(object): - samplers = (Metropolis, Slice, HamiltonianMC, NUTS) + samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis) @pytest.mark.skipif(theano.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues") @@ -393,6 +393,22 @@ def kill_grad(x): assert isinstance(steps, Slice) +class TestPopulationSamplers(object): + def test_checks_population_size(self): + """Test that population samplers check the population size.""" + steppers = [ + DEMetropolis + ] + with Model() as model: + n = Normal('n', mu=0, sd=1) + for stepper in steppers: + step = stepper() + with pytest.raises(ValueError): + trace = sample(draws=100, chains=1, step=step) + trace = sample(draws=100, chains=4, step=step) + pass + + @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") class TestNutsCheckTrace(object): def test_multiple_samplers(self): From 9a07a43905869de8abb90ba60c41ff3b70e1b06c Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 30 Nov 2017 14:07:24 +0100 Subject: [PATCH 46/56] fixes python 2.7 compatibility --- pymc3/step_methods/arraystep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 7f3bffd406..5d6d1db1a2 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -184,7 +184,7 @@ def __init__(self, vars, shared, blocked=True): self.population = None self.this_chain = None self.other_chains = None - return super().__init__(vars, shared, blocked) + return super(PopulationArrayStepShared, self).__init__(vars, shared, blocked) def link_population(self, population, chain_index): """Links the sampler to the population. From e2cfbbb918c735c191a946d25186ba1f204f2b94 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 30 Nov 2017 20:26:58 +0100 Subject: [PATCH 47/56] Using multiprocessing to parallelize iteration of chain populations (initial) --- pymc3/sampling.py | 149 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 128 insertions(+), 21 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 49332df48c..ade7426502 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1,7 +1,7 @@ from collections import defaultdict, Iterable from copy import copy import pickle - +import multiprocessing from joblib import Parallel, delayed import numpy as np import warnings @@ -627,6 +627,108 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, step.report._finalize(strace) +class ChainWorkers(object): + def __init__(self, steppers): + self.nchains = len(steppers) + self._master_ends = [] + self._processes = [] + # configure a child process for each stepper + for c,stepper in enumerate(steppers): + slave_end, master_end = multiprocessing.Pipe() + stepper_dumps = pickle.dumps(stepper, protocol=4) + process = multiprocessing.Process( + target=self.__class__._run_slave, + args=(c, stepper_dumps, slave_end), + name='ChainWalker{}'.format(c) + ) + self._master_ends.append(master_end) + self._processes.append(process) + return super(ChainWorkers, self).__init__() + + def __enter__(self): + for process in self._processes: + process.start() + return + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + # terminate the workers + for master_end in self._master_ends: + master_end.send(None) + # join the processes + for process in self._processes: + process.join(timeout=3) + except: + pm._log.warning('Termination failed.') + return + + @staticmethod + def _run_slave(c, stepper_dumps, slave_end): + """Started on a separate process to perform stepping of a chain. + + Parameters + ---------- + c : int + number of this chain + stepper : BlockedStep + a step method such as CompoundStep + slave_end : multiprocessing.connection.PipeConnection + This is our connection to the main process + """ + try: + stepper = pickle.loads(stepper_dumps) + # the stepper is not necessarily a PopulationArraySharedStep itself, + # but rather a CompoundStep, but PopulationArrayStepShared.population + # has to be updated. Therefore we identify the substeppers first. + population_steppers = [] + for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]): + if isinstance(sm, arraystep.PopulationArrayStepShared): + population_steppers.append(sm) + while True: + incoming = slave_end.recv() + if incoming is None: + break + tune_stop, population = incoming + if tune_stop: + stop_tuning(stepper) + # forward the population to the PopulationArrayStepShared objects + for popstep in population_steppers: + popstep.population = population + update = stepper.step(population[c]) + slave_end.send(update) + except Exception: + pm._log.exception('ChainWalker{}'.format(c)) + return + + def step_start(self, tune_stop, population): + """Submits the instructions to make a step. + + Parameters + ---------- + tune_stop : bool + Indicates if the condition (i == tune) is fulfilled + population : list + Current Points of all chains + """ + for c in range(self.nchains): + self._master_ends[c].send((tune_stop, population)) + return + + def step_end(self): + """Blockingly waits for the completion of a step. + + Returns + ------- + update : Point + The new position of the chain + """ + updates = [ + self._master_ends[c].recv() + for c in range(self.nchains) + ] + return updates + + def _iter_chains(draws, chains, step, start, tune=None, model=None, random_seed=None): # chains contains the chain numbers, but for indexing we need indices... @@ -657,7 +759,6 @@ def _iter_chains(draws, chains, step, start, tune=None, # 2. create a population (points) that tracks each chain # it is updated as the chains are advanced points = [Point(start[c], model=model) for c in range(nchains)] - updates = [None] * nchains # 3. Set up the steppers steppers = [None] * nchains @@ -682,27 +783,33 @@ def _iter_chains(draws, chains, step, start, tune=None, traces[c].setup(draws, c) try: - # iterate draws of all chains - for i in range(draws): - # step each of the chains - for c in range(nchains): - if i == tune: - steppers[c] = stop_tuning(steppers[c]) - updates[c] = steppers[c].step(points[c]) - - # apply the update to the points and record to the traces - for c,strace in enumerate(traces): - if steppers[c].generates_stats: - points[c], states = updates[c] - if strace.supports_sampler_stats: - strace.record(points[c], states) + workers = ChainWorkers(steppers) + with workers: + # iterate draws of all chains + for i in range(draws): + tune_stop = i == tune + workers.step_start(tune_stop, points) + updates = workers.step_end() + + ## step each of the chains + #for c in range(nchains): + # if i == tune: + # steppers[c] = stop_tuning(steppers[c]) + # updates[c] = steppers[c].step(points[c]) + + # apply the update to the points and record to the traces + for c,strace in enumerate(traces): + if steppers[c].generates_stats: + points[c], states = updates[c] + if strace.supports_sampler_stats: + strace.record(points[c], states) + else: + strace.record(points[c]) else: + points[c] = updates[c] strace.record(points[c]) - else: - points[c] = updates[c] - strace.record(points[c]) - # yield the state of all chains in parallel - yield traces + # yield the state of all chains in parallel + yield traces except KeyboardInterrupt: for c,strace in enumerate(traces): strace.close() From 30f437d59eb78402fed5d40b9a4351ccd1756a51 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 11:04:57 +0100 Subject: [PATCH 48/56] added references --- pymc3/step_methods/metropolis.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 4deaf562de..69e06fcbb6 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -495,6 +495,14 @@ class DEMetropolis(PopulationArrayStepShared): Optional model for sampling step. Defaults to None (taken from context). mode : string or `Mode` instance. compilation mode passed to Theano functions + + References + ---------- + .. [Braak2006] Cajo C.F. ter Braak (2006). + A Markov Chain Monte Carlo version of the genetic algorithm + Differential Evolution: easy Bayesian computing for real parameter spaces. + Statistics and Computing + `link `__ """ name = 'DEMetropolis' From 4998005e46eb3ccc5abea12298418de1dacdcb7d Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 11:11:23 +0100 Subject: [PATCH 49/56] added a warning that DEMetropolis is experimental --- pymc3/step_methods/metropolis.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 69e06fcbb6..b3c551c2cd 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -2,6 +2,7 @@ import numpy.random as nr import theano import scipy.linalg +import warnings from ..distributions import draw_values from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence @@ -515,6 +516,8 @@ class DEMetropolis(PopulationArrayStepShared): def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, tune=True, tune_interval=100, model=None, mode=None, **kwargs): + warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \ + 'Use carefully and be extra critical about their results!') model = pm.modelcontext(model) From 107a618c0cb2fc90411ec0ceeea7afb90e4b8b2a Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 13:23:19 +0100 Subject: [PATCH 50/56] forgotten space --- pymc3/step_methods/metropolis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index b3c551c2cd..3daed1f780 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -517,7 +517,7 @@ class DEMetropolis(PopulationArrayStepShared): def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, tune=True, tune_interval=100, model=None, mode=None, **kwargs): warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \ - 'Use carefully and be extra critical about their results!') + ' Use carefully and be extra critical about their results!') model = pm.modelcontext(model) From b94906ad23627d156fcc7dba20b104c0ed9a60bf Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 13:51:27 +0100 Subject: [PATCH 51/56] modified the PopulationStepper to automatically use parallelization or fallback to sequential mode --- pymc3/sampling.py | 189 ++++++++++++++++++++++++++++++---------------- 1 file changed, 123 insertions(+), 66 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index ade7426502..348bf96861 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -420,9 +420,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, raise if not parallel: if has_population_samplers: - pm._log.info('Population sampling ({} chains in 1 job)'.format(chains)) + pm._log.info('Population sampling ({} chains)'.format(chains)) print_step_hierarchy(step) - trace = _sample_population(**sample_args) + trace = _sample_population(parallelize=njobs > 1, **sample_args) else: pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) print_step_hierarchy(step) @@ -479,11 +479,11 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs): def _sample_population(draws, chain, chains, start, random_seed, step, tune, - model, progressbar=None, **kwargs): + model, progressbar=None, parallelize=True, **kwargs): # create the generator that iterates all chains in parallel chains = [chain + c for c in range(chains)] - sampling = _iter_chains(draws, chains, step, start, tune=tune, - model=model, random_seed=random_seed) + sampling = _prepare_iter_population(draws, chains, step, start, tune=tune, + model=model, random_seed=random_seed, parallelize=parallelize) if progressbar: sampling = tqdm(sampling, total=draws) @@ -627,39 +627,68 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, step.report._finalize(strace) -class ChainWorkers(object): - def __init__(self, steppers): +class PopulationStepper(object): + def __init__(self, steppers, parallelize=True): + """Tries to use multiprocessing to parallelize chains. + + Falls back to sequential evaluation if multiprocessing fails. + + In the multiprocessing mode of operation, a new process is started for each + chain/stepper and Pipes are used to communicate with the main process. + + Parameters + ---------- + steppers : list + A collection of independent step methods, one for each chain. + parallelize : bool + Indicates if chain parallelization is desired + """ self.nchains = len(steppers) + self.is_parallelized = False self._master_ends = [] self._processes = [] - # configure a child process for each stepper - for c,stepper in enumerate(steppers): - slave_end, master_end = multiprocessing.Pipe() - stepper_dumps = pickle.dumps(stepper, protocol=4) - process = multiprocessing.Process( - target=self.__class__._run_slave, - args=(c, stepper_dumps, slave_end), - name='ChainWalker{}'.format(c) - ) - self._master_ends.append(master_end) - self._processes.append(process) - return super(ChainWorkers, self).__init__() + self._steppers = steppers + if parallelize and sys.version_info >= (3,4): + try: + # configure a child process for each stepper + pm._log.info('Attempting to parallelize chains.') + for c,stepper in enumerate(steppers): + slave_end, master_end = multiprocessing.Pipe() + stepper_dumps = pickle.dumps(stepper, protocol=4) + process = multiprocessing.Process( + target=self.__class__._run_slave, + args=(c, stepper_dumps, slave_end), + name='ChainWalker{}'.format(c) + ) + # Starting the process might fail and takes time. + # By doing it in the constructor, the sampling progress bar + # will not be confused by the process start. + process.start() + self._master_ends.append(master_end) + self._processes.append(process) + self.is_parallelized = True + except: + pm._log.info('Population parallelization failed. ' \ + 'Falling back to sequential stepping of chains.') + else: + if parallelize: + warnings.warn('Population parallelization is only supported on Python 3.4 and ' \ + 'higher. All {} chains will step on one process.'.format(self.nchains)) + return super(PopulationStepper, self).__init__() def __enter__(self): - for process in self._processes: - process.start() + """Does nothing because processes are already started in __init__.""" return def __exit__(self, exc_type, exc_val, exc_tb): - try: - # terminate the workers - for master_end in self._master_ends: - master_end.send(None) - # join the processes - for process in self._processes: - process.join(timeout=3) - except: - pm._log.warning('Termination failed.') + if len(self._processes) > 0: + try: + for master_end in self._master_ends: + master_end.send(None) + for process in self._processes: + process.join(timeout=3) + except: + pm._log.warning('Termination failed.') return @staticmethod @@ -678,20 +707,23 @@ def _run_slave(c, stepper_dumps, slave_end): try: stepper = pickle.loads(stepper_dumps) # the stepper is not necessarily a PopulationArraySharedStep itself, - # but rather a CompoundStep, but PopulationArrayStepShared.population - # has to be updated. Therefore we identify the substeppers first. + # but rather a CompoundStep. PopulationArrayStepShared.population + # has to be updated, therefore we identify the substeppers first. population_steppers = [] for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]): if isinstance(sm, arraystep.PopulationArrayStepShared): population_steppers.append(sm) while True: incoming = slave_end.recv() + # receiving a None is the signal to exit if incoming is None: break tune_stop, population = incoming if tune_stop: stop_tuning(stepper) # forward the population to the PopulationArrayStepShared objects + # This is necessary because due to the process fork, the population + # object is no longer shared between the steppers. for popstep in population_steppers: popstep.population = population update = stepper.step(population[c]) @@ -700,8 +732,8 @@ def _run_slave(c, stepper_dumps, slave_end): pm._log.exception('ChainWalker{}'.format(c)) return - def step_start(self, tune_stop, population): - """Submits the instructions to make a step. + def step(self, tune_stop, population): + """Steps the entire population of chains. Parameters ---------- @@ -709,28 +741,36 @@ def step_start(self, tune_stop, population): Indicates if the condition (i == tune) is fulfilled population : list Current Points of all chains - """ - for c in range(self.nchains): - self._master_ends[c].send((tune_stop, population)) - return - - def step_end(self): - """Blockingly waits for the completion of a step. Returns ------- update : Point - The new position of the chain + The new positions of the chains """ - updates = [ - self._master_ends[c].recv() - for c in range(self.nchains) - ] + updates = [None] * self.nchains + if self.is_parallelized: + for c in range(self.nchains): + self._master_ends[c].send((tune_stop, population)) + # Blockingly get the step outcomes + for c in range(self.nchains): + updates[c] = self._master_ends[c].recv() + else: + for c in range(self.nchains): + if tune_stop: + self._steppers[c] = stop_tuning(self._steppers[c]) + updates[c] = self._steppers[c].step(population[c]) return updates -def _iter_chains(draws, chains, step, start, tune=None, - model=None, random_seed=None): +def _prepare_iter_population(draws, chains, step, start, tune=None, + model=None, random_seed=None, parallelize=True): + """Prepares a PopulationStepper and traces for population sampling. + + Returns + ------- + _iter_population : generator + The generator the yields traces of all chains at the same time + """ # chains contains the chain numbers, but for indexing we need indices... nchains = len(chains) model = modelcontext(model) @@ -745,7 +785,7 @@ def _iter_chains(draws, chains, step, start, tune=None, # 2. population of points is created # 3. steppers are initialized and linked to the points object # 4. traces are configured to track the sampler stats - + # 5. a PopulationStepper is configured for parallelized stepping # 1. prepare a BaseTrace for each chain traces = [_choose_backend(None, chain, model=model) for chain in chains] @@ -758,7 +798,7 @@ def _iter_chains(draws, chains, step, start, tune=None, # 2. create a population (points) that tracks each chain # it is updated as the chains are advanced - points = [Point(start[c], model=model) for c in range(nchains)] + population = [Point(start[c], model=model) for c in range(nchains)] # 3. Set up the steppers steppers = [None] * nchains @@ -772,7 +812,7 @@ def _iter_chains(draws, chains, step, start, tune=None, # link population samplers to the shared population state for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]): if isinstance(sm, arraystep.PopulationArrayStepShared): - sm.link_population(points, c) + sm.link_population(population, c) steppers[c] = chainstep # 4. configure tracking of sampler stats @@ -782,20 +822,37 @@ def _iter_chains(draws, chains, step, start, tune=None, else: traces[c].setup(draws, c) + # 5. configure the PopulationStepper (expensive call) + popstep = PopulationStepper(steppers, parallelize) + + # Because the preperations above are expensive, the actual iterator is + # in another method. This way the progbar will not be disturbed. + return _iter_population(draws, tune, popstep, steppers, traces, population) + + +def _iter_population(draws, tune, popstep, steppers, traces, points): + """Generator that iterates a PopulationStepper. + + Parameters + ---------- + draws : int + number of draws per chain + tune : int + number of tuning steps + popstep : PopulationStepper + the helper object for (parallelized) stepping of chains + steppers : list + The step methods for each chain + traces : list + Traces for each chain + points : list + population of chain states + """ try: - workers = ChainWorkers(steppers) - with workers: + with popstep: # iterate draws of all chains for i in range(draws): - tune_stop = i == tune - workers.step_start(tune_stop, points) - updates = workers.step_end() - - ## step each of the chains - #for c in range(nchains): - # if i == tune: - # steppers[c] = stop_tuning(steppers[c]) - # updates[c] = steppers[c].step(points[c]) + updates = popstep.step(i == tune, points) # apply the update to the points and record to the traces for c,strace in enumerate(traces): @@ -813,8 +870,8 @@ def _iter_chains(draws, chains, step, start, tune=None, except KeyboardInterrupt: for c,strace in enumerate(traces): strace.close() - if hasattr(step, 'report'): - step.report._finalize(strace) + if hasattr(steppers[c], 'report'): + steppers[c].report._finalize(strace) raise except BaseException: for c,strace in enumerate(traces): @@ -823,8 +880,8 @@ def _iter_chains(draws, chains, step, start, tune=None, else: for c,strace in enumerate(traces): strace.close() - if hasattr(step, 'report'): - step.report._finalize(strace) + if hasattr(steppers[c], 'report'): + steppers[c].report._finalize(strace) def _choose_backend(trace, chain, shortcuts=None, **kwds): From 58fa3365dee428c469a57fbb85bcebf32ff7d53f Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 15:10:42 +0100 Subject: [PATCH 52/56] avoiding a reimport, disabled chain parallelization by default --- pymc3/sampling.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 348bf96861..b9459002d4 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1,7 +1,6 @@ from collections import defaultdict, Iterable from copy import copy import pickle -import multiprocessing from joblib import Parallel, delayed import numpy as np import warnings @@ -422,7 +421,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, if has_population_samplers: pm._log.info('Population sampling ({} chains)'.format(chains)) print_step_hierarchy(step) - trace = _sample_population(parallelize=njobs > 1, **sample_args) + trace = _sample_population(**sample_args) else: pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) print_step_hierarchy(step) @@ -479,11 +478,11 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs): def _sample_population(draws, chain, chains, start, random_seed, step, tune, - model, progressbar=None, parallelize=True, **kwargs): + model, progressbar=None, parallelize=False, **kwargs): # create the generator that iterates all chains in parallel chains = [chain + c for c in range(chains)] - sampling = _prepare_iter_population(draws, chains, step, start, tune=tune, - model=model, random_seed=random_seed, parallelize=parallelize) + sampling = _prepare_iter_population(draws, chains, step, start, parallelize, + tune=tune, model=model, random_seed=random_seed) if progressbar: sampling = tqdm(sampling, total=draws) @@ -628,7 +627,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, class PopulationStepper(object): - def __init__(self, steppers, parallelize=True): + def __init__(self, steppers, parallelize): """Tries to use multiprocessing to parallelize chains. Falls back to sequential evaluation if multiprocessing fails. @@ -652,6 +651,7 @@ def __init__(self, steppers, parallelize=True): try: # configure a child process for each stepper pm._log.info('Attempting to parallelize chains.') + import multiprocessing for c,stepper in enumerate(steppers): slave_end, master_end = multiprocessing.Pipe() stepper_dumps = pickle.dumps(stepper, protocol=4) @@ -674,6 +674,9 @@ def __init__(self, steppers, parallelize=True): if parallelize: warnings.warn('Population parallelization is only supported on Python 3.4 and ' \ 'higher. All {} chains will step on one process.'.format(self.nchains)) + else: + pm._log.info('Chains are not parallelized. You can enable this by passing ' \ + 'pm.sample(parallelize=True).') return super(PopulationStepper, self).__init__() def __enter__(self): @@ -762,8 +765,8 @@ def step(self, tune_stop, population): return updates -def _prepare_iter_population(draws, chains, step, start, tune=None, - model=None, random_seed=None, parallelize=True): +def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None, + model=None, random_seed=None): """Prepares a PopulationStepper and traces for population sampling. Returns From 3bcf0fde61001c0a0e95498dcfe72ed0c7ca6482 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Fri, 1 Dec 2017 15:14:23 +0100 Subject: [PATCH 53/56] increased nchains --- pymc3/examples/samplers_mvnormal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index c7007a562f..14b87d7958 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -45,7 +45,7 @@ def run(steppers, p): t_start = time.time() mt = pm.sample( draws=10000, - chains=6, + chains=16, parallelize=False, step=step_cls(), start=start ) From c91974230e7635761189af9eb089fe87d1746ede Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Mon, 4 Dec 2017 16:32:39 +0100 Subject: [PATCH 54/56] resolving conflicts --- pymc3/sampling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index b9459002d4..43d8b53032 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1,6 +1,8 @@ from collections import defaultdict, Iterable from copy import copy import pickle +from six import integer_types + from joblib import Parallel, delayed import numpy as np import warnings @@ -11,7 +13,7 @@ from .backends.ndarray import NDArray from .model import modelcontext, Point from .step_methods import arraystep -from .step_methods import (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis, +from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, Slice, CompoundStep) from .util import update_start_vals @@ -24,7 +26,7 @@ __all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts'] -STEP_METHODS = (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis, +STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis) From 24b4d633ccd696f8fa3fff0c34b80a0d5afaab26 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Mon, 4 Dec 2017 16:40:09 +0100 Subject: [PATCH 55/56] resolving conflicts --- pymc3/sampling.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 43d8b53032..f2bd3865fa 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -12,10 +12,9 @@ from .backends.base import BaseTrace, MultiTrace from .backends.ndarray import NDArray from .model import modelcontext, Point -from .step_methods import arraystep from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, - Slice, CompoundStep) + Slice, CompoundStep, arraystep) from .util import update_start_vals from .vartypes import discrete_types from pymc3.step_methods.hmc import quadpotential From 7db836a9a9860c8279b037cbc3542fd04e6a012e Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Tue, 5 Dec 2017 11:55:17 +0100 Subject: [PATCH 56/56] included DEMetropolis in new features --- RELEASE-NOTES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 69c44cd3b8..3572ada903 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -5,6 +5,9 @@ - Improve NUTS initialization `advi+adapt_diag_grad` and add `jitter+adapt_diag_grad` (#2643) - Fixed `compareplot` to use `loo` output. +### New Features +- Michael Osthege added support for population-samplers and implemented differential evolution metropolis (`DEMetropolis`). For models with correlated dimensions that can not use gradient-based samplers, the `DEMetropolis` sampler can give higher effective sampling rates. (also see [PR#2735](https://github.com/pymc-devs/pymc3/pull/2735)) + ## PyMC3 3.2 (October 10, 2017)