From b95af1fdb0e01fc2a31f306d6514533a8681525f Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 18 Oct 2017 13:32:51 +0200
Subject: [PATCH 01/56] catching other error types that may occur when
 gradients are not available

---
 pymc3/tuning/starting.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 892e357243..275233281d 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -11,6 +11,7 @@
 from ..vartypes import discrete_types, typefilter
 from ..model import modelcontext, Point
 from ..theanof import inputvars
+import theano.gradient as tg
 from ..blocking import DictToArrayBijection, ArrayOrdering
 from ..util import update_start_vals, get_default_varnames
 
@@ -75,7 +76,7 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
     try:
         dlogp_func = bij.mapf(model.fastdlogp_nojac(vars))
         compute_gradient = True
-    except AttributeError:
+    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
         compute_gradient = False
 
     if disc_vars or not compute_gradient:

From fd16ba537353a0a77a2beffefabfdc555086b691 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 21 Nov 2017 16:53:34 +0100
Subject: [PATCH 02/56] added a benchmark example for correlated dimensions

---
 pymc3/examples/samplers_mvnormal.py | 53 +++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 pymc3/examples/samplers_mvnormal.py

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
new file mode 100644
index 0000000000..1150229902
--- /dev/null
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -0,0 +1,53 @@
+"""
+Comparing different samplers on the banana-shaped distribution
+
+"""
+
+
+import numpy as np
+import pandas as pd
+import pymc3 as pm
+import theano.tensor as tt
+import matplotlib.pyplot as plt
+
+
+
+
+def run(steppers, p):
+    steppers = set(steppers)
+    traces = {}
+    effn = {}
+
+    with pm.Model() as model:
+        mu = np.array([0.,0.])
+        cov = np.array([[1.,p],[p,1.]])
+        z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
+
+        for step_cls in steppers:
+            name = step_cls.__name__
+            print('Step method: {}'.format(name))
+            mt = pm.sample(
+                draws=10000,
+                step=step_cls(),
+                start={'z': [0, 0]}
+            )
+            traces[name] = mt
+            en = pm.diagnostics.effective_n(mt)
+            effn[name] = np.mean(en['z']) / len(mt) / mt.nchains
+    return traces, effn
+
+
+if __name__ == '__main__':
+    methods = [pm.Metropolis, pm.NUTS]
+    names = [c.__name__ for c in methods]
+    df = pd.DataFrame(columns=['p'] + names)
+    df['p'] = [.0,.9]
+    df = df.set_index('p')
+    rates = []
+    for p in df.index:
+        trace, rate = run(methods, p)
+        for name in names:
+            df.set_value(p, name, rate[name])
+
+    print('Effective sample size [0...1]')
+    print(df)

From 8ec0b80237923f32a62fb47c40936e12866e7ffb Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 21 Nov 2017 16:55:10 +0100
Subject: [PATCH 03/56] marking Metropolis as COMPATIBLE for all types, added
 line breaks (code style)

---
 pymc3/step_methods/metropolis.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index e0d049dba7..cd1c74be35 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -168,9 +168,7 @@ def astep(self, q0):
 
     @staticmethod
     def competence(var, has_grad):
-        if var.dtype in pm.discrete_types:
-            return Competence.COMPATIBLE
-        return Competence.INCOMPATIBLE
+        return Competence.COMPATIBLE
 
 
 def tune(scale, acc_rate):
@@ -348,6 +346,7 @@ def competence(var):
             return Competence.IDEAL
         return Competence.INCOMPATIBLE
 
+
 class CategoricalGibbsMetropolis(ArrayStep):
     """A Metropolis-within-Gibbs step method optimized for categorical variables.
        This step method works for Bernoulli variables as well, but it is not
@@ -465,16 +464,19 @@ def competence(var):
             return Competence.COMPATIBLE
         return Competence.INCOMPATIBLE
 
+
 def sample_except(limit, excluded):
     candidate = nr.choice(limit - 1)
     if candidate >= excluded:
         candidate += 1
     return candidate
 
+
 def softmax(x):
     e_x = np.exp(x - np.max(x))
     return e_x / np.sum(e_x, axis = 0)
 
+
 def delta_logp(logp, vars, shared):
     [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared)
 

From 1d04f53b3babcd5f4bac134cb176cd9581df4ac2 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 21 Nov 2017 18:06:16 +0100
Subject: [PATCH 04/56] specifying single job to force the sample_many function

---
 pymc3/examples/samplers_mvnormal.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 1150229902..747ea5ba76 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -28,6 +28,8 @@ def run(steppers, p):
             print('Step method: {}'.format(name))
             mt = pm.sample(
                 draws=10000,
+                njobs=1,
+                chains=6,
                 step=step_cls(),
                 start={'z': [0, 0]}
             )

From 2bd6a5a76aa2a7e677fd14e42a917d57763621e9 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 21 Nov 2017 18:21:02 +0100
Subject: [PATCH 05/56] modified sampling procedure to interate chains in
 parallel (instead of sequentially)

---
 pymc3/sampling.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 16db40b951..9ab1661d99 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,4 +1,5 @@
 from collections import defaultdict, Iterable
+from copy import copy
 import pickle
 
 from joblib import Parallel, delayed
@@ -382,6 +383,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
 
     parallel = njobs > 1 and chains > 1
     if parallel:
+        pm._log.info('Multiprocess sampling ({} jobs, {} chains)'.format(njobs, chains))
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
@@ -396,6 +398,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
             else:
                 raise
     if not parallel:
+        pm._log.info('Multichain sampling ({} jobs, {} chains)'.format(njobs, chains))
         trace = _sample_many(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
@@ -429,7 +432,7 @@ def _check_start_shape(model, start):
         raise ValueError("Bad shape for start argument:{}".format(e))
 
 
-def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
+def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs):
     traces = []
     for i in range(chains):
         trace = _sample(draws=draws, chain=chain + i, start=start[i],
@@ -448,6 +451,21 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
     return MultiTrace(traces)
 
 
+def _sample_many(draws, chain, chains, start, random_seed, step, tune, model, progressbar=None, **kwargs):
+    # create the generator that iterates all chains in parallel
+    chains = [chain + c for c in range(chains)]
+    sampling = _iter_chains(draws, chains, step, start, tune=tune, model=model, random_seed=random_seed)
+
+    if progressbar:
+        sampling = tqdm(sampling, total=draws)
+
+    latest_traces = None
+    for it,traces in enumerate(sampling):
+        latest_traces = traces
+        # TODO: add support for liveplot during population-sampling
+    return MultiTrace(traces)
+
+
 def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
             trace=None, tune=None, model=None, live_plot=False,
             live_plot_kwargs=None, **kwargs):
@@ -580,6 +598,78 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             step.report._finalize(strace)
 
 
+def _iter_chains(draws, chains, step, start, tune=None,
+                 model=None, random_seed=None):
+    model = modelcontext(model)
+    draws = int(draws)
+    if random_seed is not None:
+        np.random.seed(random_seed)
+    if draws < 1:
+        raise ValueError('Argument `draws` should be above 0.')
+
+    # need indepenently tuned samplers for each chain
+    try:
+        steppers = [CompoundStep(copy(step)) for c in chains]
+    except TypeError:
+        steppers = [copy(step) for c in chains]
+
+    # points tracks the current position of each chain in the parameter space
+    # it is updated as the chains are advanced
+    points = [Point(start[c], model=model) for c in chains]
+
+    # prepare a BaseTrace for each chain
+    traces = [_choose_backend(None, c, model=model) for c in chains]
+    for chain,strace,step in zip(chains, traces, steppers):
+        # initialize the trace size
+        if len(strace) > 0:
+            update_start_vals(start[chain], strace.point(-1), model)
+        else:
+            update_start_vals(start[chain], model.test_point, model)
+        # initialize tracking of sampler stats
+        if step.generates_stats and strace.supports_sampler_stats:
+            strace.setup(draws, chain, step.stats_dtypes)
+        else:
+            strace.setup(draws, chain)
+
+    try:
+        # iterate draws of all chains
+        for i in range(draws):
+            # step each of the chains
+            for c,strace in enumerate(traces):
+                if i == tune:
+                    steppers[c] = stop_tuning(steppers[c])
+
+                # advance this chain (TODO: using the points of all chains)
+                step_result = steppers[c].step(points[c])
+
+                if steppers[c].generates_stats:
+                    points[c], states = step_result
+                    if strace.supports_sampler_stats:
+                        strace.record(points[c], states)
+                    else:
+                        strace.record(points[c])
+                else:
+                    points[c] = step_result
+                    strace.record(points[c])
+            # yield the state of all chains in parallel
+            yield traces
+    except KeyboardInterrupt:
+        for chain,strace in enumerate(traces):
+            strace.close()
+            if hasattr(step, 'report'):
+                step.report._finalize(strace)
+        raise
+    except BaseException:
+        for chain,strace in enumerate(traces):
+            strace.close()
+        raise
+    else:
+        for chain,strace in enumerate(traces):
+            strace.close()
+            if hasattr(step, 'report'):
+                step.report._finalize(strace)
+
+
 def _choose_backend(trace, chain, shortcuts=None, **kwds):
     if isinstance(trace, BaseTrace):
         return trace

From df37cbbb13b121c8730e34a5539518b70ed89db8 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 21 Nov 2017 18:35:21 +0100
Subject: [PATCH 06/56] updated description

---
 pymc3/examples/samplers_mvnormal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 747ea5ba76..e5dcb8cf33 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -1,5 +1,5 @@
 """
-Comparing different samplers on the banana-shaped distribution
+Comparing different samplers on a correlated bivariate normal distribution.
 
 """
 

From ba7d083fd2869c93b3138435d42a9ec601bf8b90 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 22 Nov 2017 16:20:27 +0100
Subject: [PATCH 07/56] indexing samplers by chain number instead of chain id

---
 pymc3/sampling.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 9ab1661d99..240992e06e 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -600,6 +600,8 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
 
 def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
+    # chains contains the chain numbers, but for indexing we need indices...
+    cs = list(range(len(chains)))
     model = modelcontext(model)
     draws = int(draws)
     if random_seed is not None:
@@ -608,32 +610,36 @@ def _iter_chains(draws, chains, step, start, tune=None,
         raise ValueError('Argument `draws` should be above 0.')
 
     # need indepenently tuned samplers for each chain
-    try:
-        steppers = [CompoundStep(copy(step)) for c in chains]
-    except TypeError:
-        steppers = [copy(step) for c in chains]
+    is_compound = isinstance(step, list)
+    if is_compound:
+        steppers = [CompoundStep(copy(step)) for c in cs]
+    else:
+        steppers = [copy(step) for c in cs]
 
     # points tracks the current position of each chain in the parameter space
     # it is updated as the chains are advanced
-    points = [Point(start[c], model=model) for c in chains]
+    points = [Point(start[c], model=model) for c in cs]
 
     # prepare a BaseTrace for each chain
     traces = [_choose_backend(None, c, model=model) for c in chains]
-    for chain,strace,step in zip(chains, traces, steppers):
+    for c,strace in enumerate(traces):
         # initialize the trace size
         if len(strace) > 0:
-            update_start_vals(start[chain], strace.point(-1), model)
+            update_start_vals(start[c], strace.point(-1), model)
         else:
-            update_start_vals(start[chain], model.test_point, model)
+            update_start_vals(start[c], model.test_point, model)
         # initialize tracking of sampler stats
         if step.generates_stats and strace.supports_sampler_stats:
-            strace.setup(draws, chain, step.stats_dtypes)
+            strace.setup(draws, c, steppers[c].stats_dtypes)
         else:
-            strace.setup(draws, chain)
+            strace.setup(draws, c)
 
     try:
         # iterate draws of all chains
         for i in range(draws):
+            # snapshot all points (the original will be updated)
+            last_points = copy(points)
+
             # step each of the chains
             for c,strace in enumerate(traces):
                 if i == tune:
@@ -654,17 +660,17 @@ def _iter_chains(draws, chains, step, start, tune=None,
             # yield the state of all chains in parallel
             yield traces
     except KeyboardInterrupt:
-        for chain,strace in enumerate(traces):
+        for c,strace in enumerate(traces):
             strace.close()
             if hasattr(step, 'report'):
                 step.report._finalize(strace)
         raise
     except BaseException:
-        for chain,strace in enumerate(traces):
+        for c,strace in enumerate(traces):
             strace.close()
         raise
     else:
-        for chain,strace in enumerate(traces):
+        for c,strace in enumerate(traces):
             strace.close()
             if hasattr(step, 'report'):
                 step.report._finalize(strace)

From a4894bfe86c786b6b609c678cd84d621379857c5 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 22 Nov 2017 16:20:57 +0100
Subject: [PATCH 08/56] print transposes result table

---
 pymc3/examples/samplers_mvnormal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index e5dcb8cf33..b6546f4004 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -52,4 +52,4 @@ def run(steppers, p):
             df.set_value(p, name, rate[name])
 
     print('Effective sample size [0...1]')
-    print(df)
+    print(df.T)

From cd14d6a6f17e2532c5a77781938975a7801f8b09 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 12:54:47 +0100
Subject: [PATCH 09/56] created PopulationArrayStepShared base class that
 allows the individual sampler to be aware of other chains

---
 pymc3/step_methods/arraystep.py | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 564d37e2fc..42bc35343b 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -165,6 +165,52 @@ def step(self, point):
             return bij.rmap(apoint)
 
 
+class PopulationArrayStepShared(BlockedStep):
+    """Faster version of ArrayStep that requires the substep method that does not wrap
+       the functions the step method uses.
+
+    Works by setting shared variables before using the step. This eliminates the mapping
+    and unmapping overhead as well as moving fewer variables around.
+    """
+
+    def __init__(self, vars, shared, blocked=True):
+        """
+        Parameters
+        ----------
+        vars : list of sampling variables
+        shared : dict of theano variable -> shared variable
+        blocked : Boolean (default True)
+        """
+        self.vars = vars
+        self.ordering = ArrayOrdering(vars)
+        self.bij = None
+        self.shared = {str(var): shared for var, shared in shared.items()}
+        self.blocked = blocked
+        self.population = None
+        self.this_chain = None
+        self.other_chains = None
+
+    def link_population(self, population, chain_index):
+        self.population = population
+        self.this_chain = chain_index
+        self.other_chains = [c for c in range(len(population)) if c != chain_index]
+        return
+
+    def step(self, point):
+        for var, share in self.shared.items():
+            share.set_value(point[var])
+
+        if self.bij is None:
+            self.bij = DictToArrayBijection(self.ordering, point)
+
+        if self.generates_stats:
+            apoint, stats = self.astep(self.bij.map(point))
+            return self.bij.rmap(apoint), stats
+        else:
+            apoint = self.astep(self.bij.map(point))
+            return self.bij.rmap(apoint)
+
+
 class GradientSharedStep(BlockedStep):
     def __init__(self, vars, model=None, blocked=True,
                  dtype=None, **theano_kwargs):

From dbc42bcc4bbbd9a0056dc6ddb315f094bee73737 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 12:56:44 +0100
Subject: [PATCH 10/56] modified sampling loop to account for the
 PopulationArrayStepShared sampler types

---
 pymc3/sampling.py | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 240992e06e..ad162cfecc 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -11,6 +11,7 @@
 from .backends.base import BaseTrace, MultiTrace
 from .backends.ndarray import NDArray
 from .model import modelcontext, Point
+from .step_methods import arraystep
 from .step_methods import (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis,
                            BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
                            Slice, CompoundStep)
@@ -601,24 +602,33 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
 def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
     # chains contains the chain numbers, but for indexing we need indices...
-    cs = list(range(len(chains)))
+    nchains = len(chains)
     model = modelcontext(model)
     draws = int(draws)
+    if not isinstance(step, (list, tuple)):
+        step = [step]
+    is_compound = len(step) > 1
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
         raise ValueError('Argument `draws` should be above 0.')
 
-    # need indepenently tuned samplers for each chain
-    is_compound = isinstance(step, list)
-    if is_compound:
-        steppers = [CompoundStep(copy(step)) for c in cs]
-    else:
-        steppers = [copy(step) for c in cs]
-
     # points tracks the current position of each chain in the parameter space
     # it is updated as the chains are advanced
-    points = [Point(start[c], model=model) for c in cs]
+    points = [Point(start[c], model=model) for c in range(nchains)]
+    updates = [None] * nchains
+
+    # Set up the steppers
+    steppers = [None] * nchains
+    for c in range(nchains):
+        # need indepenently tuned samplers for each chain
+        smethods = copy(step)
+        # link Population samplers to the shared population state
+        for sm in smethods:
+            if isinstance(sm, arraystep.PopulationArrayStepShared):
+                sm.link_population(points, c)
+        steppers[c] = CompoundStep(smethods) if is_compound else smethods[0]
+
 
     # prepare a BaseTrace for each chain
     traces = [_choose_backend(None, c, model=model) for c in chains]
@@ -629,7 +639,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
         else:
             update_start_vals(start[c], model.test_point, model)
         # initialize tracking of sampler stats
-        if step.generates_stats and strace.supports_sampler_stats:
+        if steppers[c].generates_stats and strace.supports_sampler_stats:
             strace.setup(draws, c, steppers[c].stats_dtypes)
         else:
             strace.setup(draws, c)
@@ -637,25 +647,22 @@ def _iter_chains(draws, chains, step, start, tune=None,
     try:
         # iterate draws of all chains
         for i in range(draws):
-            # snapshot all points (the original will be updated)
-            last_points = copy(points)
-
             # step each of the chains
-            for c,strace in enumerate(traces):
+            for c in range(nchains):
                 if i == tune:
                     steppers[c] = stop_tuning(steppers[c])
+                updates[c] = steppers[c].step(points[c])
 
-                # advance this chain (TODO: using the points of all chains)
-                step_result = steppers[c].step(points[c])
-
+            # apply the update to the points and record to the traces
+            for c,strace in enumerate(traces):
                 if steppers[c].generates_stats:
-                    points[c], states = step_result
+                    points[c], states = updates[c]
                     if strace.supports_sampler_stats:
                         strace.record(points[c], states)
                     else:
                         strace.record(points[c])
                 else:
-                    points[c] = step_result
+                    points[c] = updates[c]
                     strace.record(points[c])
             # yield the state of all chains in parallel
             yield traces

From 0ca38043e2646515baa7b274d38aa3e17266f169 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 12:57:10 +0100
Subject: [PATCH 11/56] added the DEMetropolis sampler

---
 pymc3/examples/samplers_mvnormal.py |   6 +-
 pymc3/step_methods/__init__.py      |   1 +
 pymc3/step_methods/metropolis.py    | 109 +++++++++++++++++++++++++++-
 3 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index b6546f4004..32c27a4c44 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -40,7 +40,11 @@ def run(steppers, p):
 
 
 if __name__ == '__main__':
-    methods = [pm.Metropolis, pm.NUTS]
+    methods = [
+        pm.Metropolis,
+        pm.NUTS,
+        pm.DEMetropolis
+    ]
     names = [c.__name__ for c in methods]
     df = pd.DataFrame(columns=['p'] + names)
     df['p'] = [.0,.9]
diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py
index 67212508d3..d18e36a153 100644
--- a/pymc3/step_methods/__init__.py
+++ b/pymc3/step_methods/__init__.py
@@ -3,6 +3,7 @@
 from .hmc import HamiltonianMC, NUTS
 
 from .metropolis import Metropolis
+from .metropolis import DEMetropolis
 from .metropolis import BinaryMetropolis
 from .metropolis import BinaryGibbsMetropolis
 from .metropolis import CategoricalGibbsMetropolis
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index cd1c74be35..2a15a76694 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -4,7 +4,8 @@
 import scipy.linalg
 
 from ..distributions import draw_values
-from .arraystep import ArrayStepShared, ArrayStep, metrop_select, Competence
+from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
+from ..blocking import DictToArrayBijection
 import pymc3 as pm
 from pymc3.theanof import floatX
 
@@ -465,6 +466,112 @@ def competence(var):
         return Competence.INCOMPATIBLE
 
 
+class DEMetropolis(PopulationArrayStepShared):
+    """
+    Differential Evolution Metropolis sampling step
+
+    Parameters
+    ----------
+    vars : list
+        List of variables for sampler
+    S : standard deviation or covariance matrix
+        Some measure of variance to parameterize proposal distribution
+    proposal_dist : function
+        Function that returns zero-mean deviates when parameterized with
+        S (and n). Defaults to normal.
+    scaling : scalar or array
+        Initial scale factor for proposal. Defaults to 1.
+    tune : bool
+        Flag for tuning. Defaults to True.
+    tune_interval : int
+        The frequency of tuning. Defaults to 100 iterations.
+    model : PyMC Model
+        Optional model for sampling step. Defaults to None (taken from context).
+    mode :  string or `Mode` instance.
+        compilation mode passed to Theano functions
+    """
+    name = 'DEmetropolis'
+
+    default_blocked = False
+    generates_stats = True
+    stats_dtypes = [{
+        'accept': np.float64,
+        'tune': np.bool,
+    }]
+
+    def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1.,
+                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
+
+        model = pm.modelcontext(model)
+
+        if vars is None:
+            vars = model.vars
+        vars = pm.inputvars(vars)
+
+        if S is None:
+            S = np.ones(sum(v.dsize for v in vars))
+
+        if proposal_dist is not None:
+            self.proposal_dist = proposal_dist(S)
+        elif S.ndim == 1:
+            self.proposal_dist = NormalProposal(S)
+        elif S.ndim == 2:
+            self.proposal_dist = MultivariateNormalProposal(S)
+        else:
+            raise ValueError("Invalid rank for variance: %s" % S.ndim)
+
+        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.ndim)
+        self.tune = tune
+        self.tune_interval = tune_interval
+        self.steps_until_tune = tune_interval
+        self.accepted = 0
+
+        self.mode = mode
+
+        shared = pm.make_shared_replacements(vars, model)
+        self.delta_logp = delta_logp(model.logpt, vars, shared)
+        super(DEMetropolis, self).__init__(vars, shared)
+
+    def astep(self, q0):
+        if not self.steps_until_tune and self.tune:
+            # Tune scaling parameter
+            self.scaling = tune(
+                self.scaling, self.accepted / float(self.tune_interval))
+            # Reset counter
+            self.steps_until_tune = self.tune_interval
+            self.accepted = 0
+
+        epsilon = self.proposal_dist() * self.scaling
+
+        # differential evolution proposal
+        # select two other chains
+        ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False)
+        r1 = self.bij.map(self.population[ir1])
+        r2 = self.bij.map(self.population[ir2])
+        # propose a jump
+        q = floatX(q0 + self.lamb * (r1 - r2) + epsilon)
+
+        accept = self.delta_logp(q, q0)
+        q_new, accepted = metrop_select(accept, q, q0)
+        self.accepted += accepted
+
+        self.steps_until_tune -= 1
+
+        stats = {
+            'tune': self.tune,
+            'accept': np.exp(accept),
+        }
+
+        return q_new, [stats]
+
+    @staticmethod
+    def competence(var, has_grad):
+        if var.dtype in pm.discrete_types:
+            return Competence.INCOMPATIBLE
+        return Competence.COMPATIBLE
+
+
 def sample_except(limit, excluded):
     candidate = nr.choice(limit - 1)
     if candidate >= excluded:

From 2206b456bd137352d31b8ca3dd4665e4213c5b1c Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 13:24:31 +0100
Subject: [PATCH 12/56] raisig an error when the population is too small

---
 pymc3/step_methods/arraystep.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 42bc35343b..8156e1d081 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -194,6 +194,9 @@ def link_population(self, population, chain_index):
         self.population = population
         self.this_chain = chain_index
         self.other_chains = [c for c in range(len(population)) if c != chain_index]
+        if not len(self.other_chains) > 1:
+            raise ValueError('Population ist just {} + {}. This is too small. You should ' \
+                'increase the number of chains.'.format(self.this_chain, self.other_chains))
         return
 
     def step(self, point):

From 6e63f4960e5275222a72531649c84c489127bd86 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 14:52:28 +0100
Subject: [PATCH 13/56] verbose debug logging

---
 pymc3/sampling.py               | 3 +++
 pymc3/step_methods/arraystep.py | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index ad162cfecc..caa35b2b7d 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -620,11 +620,14 @@ def _iter_chains(draws, chains, step, start, tune=None,
 
     # Set up the steppers
     steppers = [None] * nchains
+    pm._log.info('Sampler setup')
     for c in range(nchains):
+        pm._log.info('Chain {}'.format(c))
         # need indepenently tuned samplers for each chain
         smethods = copy(step)
         # link Population samplers to the shared population state
         for sm in smethods:
+            pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars))
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(points, c)
         steppers[c] = CompoundStep(smethods) if is_compound else smethods[0]
diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 8156e1d081..be640806f9 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -5,6 +5,7 @@
 import numpy as np
 from numpy.random import uniform
 from enum import IntEnum, unique
+import pymc3 as pm
 
 __all__ = [
     'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence']
@@ -195,8 +196,9 @@ def link_population(self, population, chain_index):
         self.this_chain = chain_index
         self.other_chains = [c for c in range(len(population)) if c != chain_index]
         if not len(self.other_chains) > 1:
-            raise ValueError('Population ist just {} + {}. This is too small. You should ' \
+            raise ValueError('Population is just {} + {}. This is too small. You should ' \
                 'increase the number of chains.'.format(self.this_chain, self.other_chains))
+        pm._log.info('Sampler {} in population {}'.format(self.this_chain, self.other_chains))
         return
 
     def step(self, point):

From 9395de3c0ca590f029aa37893bf4fe138761fcb7 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 15:40:06 +0100
Subject: [PATCH 14/56] removed debug print

---
 pymc3/step_methods/arraystep.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index be640806f9..590785cd01 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -5,7 +5,6 @@
 import numpy as np
 from numpy.random import uniform
 from enum import IntEnum, unique
-import pymc3 as pm
 
 __all__ = [
     'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence']
@@ -198,7 +197,6 @@ def link_population(self, population, chain_index):
         if not len(self.other_chains) > 1:
             raise ValueError('Population is just {} + {}. This is too small. You should ' \
                 'increase the number of chains.'.format(self.this_chain, self.other_chains))
-        pm._log.info('Sampler {} in population {}'.format(self.this_chain, self.other_chains))
         return
 
     def step(self, point):

From 6105dec3cfc569549032ab3dbc03df92c83c5004 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 15:41:51 +0100
Subject: [PATCH 15/56] forcing CompoundStep type

---
 pymc3/sampling.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index caa35b2b7d..1715526926 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -339,6 +339,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     if model.ndim == 0:
         raise ValueError('The model does not contain any free variables.')
 
+
     if step is None and init is not None and pm.model.all_continuous(model.vars):
         try:
             # By default, try to use NUTS
@@ -359,6 +360,15 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     else:
         step = assign_step_methods(model, step, step_kwargs=step_kwargs)
 
+    if not isinstance(step, CompoundStep):
+        if not isinstance(step, (list, tuple)):
+            step = [step]
+        step = CompoundStep(step)
+
+    # print the sampler assignment
+    for sm in step.methods:
+        pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars))
+
     if start is None:
         start = [None] * chains
     if isinstance(start, dict):
@@ -601,13 +611,12 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
 
 def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
+    if not isinstance(step, CompoundStep):
+        raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__))
     # chains contains the chain numbers, but for indexing we need indices...
     nchains = len(chains)
     model = modelcontext(model)
     draws = int(draws)
-    if not isinstance(step, (list, tuple)):
-        step = [step]
-    is_compound = len(step) > 1
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
@@ -620,17 +629,14 @@ def _iter_chains(draws, chains, step, start, tune=None,
 
     # Set up the steppers
     steppers = [None] * nchains
-    pm._log.info('Sampler setup')
     for c in range(nchains):
-        pm._log.info('Chain {}'.format(c))
         # need indepenently tuned samplers for each chain
-        smethods = copy(step)
+        chainstep = copy(step)
         # link Population samplers to the shared population state
-        for sm in smethods:
-            pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars))
+        for sm in chainstep.methods:
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(points, c)
-        steppers[c] = CompoundStep(smethods) if is_compound else smethods[0]
+        steppers[c] = chainstep
 
 
     # prepare a BaseTrace for each chain

From 0494bc0264b3eaa0b5054e2055a275a0a5297fd6 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 15:42:18 +0100
Subject: [PATCH 16/56] formatting

---
 pymc3/sampling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 1715526926..273dd48c8e 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -339,7 +339,6 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     if model.ndim == 0:
         raise ValueError('The model does not contain any free variables.')
 
-
     if step is None and init is not None and pm.model.all_continuous(model.vars):
         try:
             # By default, try to use NUTS

From e25ce1962652bc51059bf9d6b68af9ed5e6d05ed Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 15:42:59 +0100
Subject: [PATCH 17/56] setting DEMetropolis as a blocked step method

---
 pymc3/step_methods/metropolis.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 2a15a76694..de66631168 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -490,9 +490,9 @@ class DEMetropolis(PopulationArrayStepShared):
     mode :  string or `Mode` instance.
         compilation mode passed to Theano functions
     """
-    name = 'DEmetropolis'
+    name = 'DEMetropolis'
 
-    default_blocked = False
+    default_blocked = True
     generates_stats = True
     stats_dtypes = [{
         'accept': np.float64,

From 2841442270fe83e6ad8d88ab41dae8a9b99cd5d0 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 23 Nov 2017 15:45:28 +0100
Subject: [PATCH 18/56] measuring the runtime, example with both 2D z-variable
 and two 1D x,y variables

---
 pymc3/examples/samplers_mvnormal.py | 59 ++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 32c27a4c44..50e79d1174 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -5,38 +5,55 @@
 
 
 import numpy as np
+import time
 import pandas as pd
 import pymc3 as pm
 import theano.tensor as tt
 import matplotlib.pyplot as plt
 
 
-
+USE_XY = True
 
 def run(steppers, p):
     steppers = set(steppers)
     traces = {}
     effn = {}
+    runtimes = {}
 
     with pm.Model() as model:
-        mu = np.array([0.,0.])
-        cov = np.array([[1.,p],[p,1.]])
-        z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
+        if USE_XY:
+            x = pm.Flat('x')
+            y = pm.Flat('y')
+            mu = np.array([0.,0.])
+            cov = np.array([[1.,p],[p,1.]])
+            z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y]))
+            p = pm.Potential('logp_xy', z)
+            start = {'x': 0, 'y': 0}
+        else:
+            mu = np.array([0.,0.])
+            cov = np.array([[1.,p],[p,1.]])
+            z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
+            start={'z': [0, 0]}
 
         for step_cls in steppers:
             name = step_cls.__name__
-            print('Step method: {}'.format(name))
+            print('\r\nStep method: {}'.format(name))
+            t_start = time.time()
             mt = pm.sample(
                 draws=10000,
                 njobs=1,
                 chains=6,
                 step=step_cls(),
-                start={'z': [0, 0]}
+                start=start
             )
+            runtimes[name] = time.time() - t_start
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
-            effn[name] = np.mean(en['z']) / len(mt) / mt.nchains
-    return traces, effn
+            if USE_XY:
+                effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
+            else:
+                effn[name] = np.mean(en['z']) / len(mt) / mt.nchains
+    return traces, effn, runtimes
 
 
 if __name__ == '__main__':
@@ -46,14 +63,22 @@ def run(steppers, p):
         pm.DEMetropolis
     ]
     names = [c.__name__ for c in methods]
-    df = pd.DataFrame(columns=['p'] + names)
-    df['p'] = [.0,.9]
-    df = df.set_index('p')
-    rates = []
-    for p in df.index:
-        trace, rate = run(methods, p)
+    df_effectiven = pd.DataFrame(columns=['p'] + names)
+    df_effectiven['p'] = [.0,.9]
+    df_effectiven = df_effectiven.set_index('p')
+
+    df_runtime = pd.DataFrame(columns=['p'] + names)
+    df_runtime['p'] = [.0,.9]
+    df_runtime = df_runtime.set_index('p')
+
+    for p in df_effectiven.index:
+        trace, rate, runtime = run(methods, p)
         for name in names:
-            df.set_value(p, name, rate[name])
+            df_effectiven.set_value(p, name, rate[name])
+            df_runtime.set_value(p, name, runtime[name])
+
+    print('\r\nEffective sample size [0...1]')
+    print(df_effectiven.T)
 
-    print('Effective sample size [0...1]')
-    print(df.T)
+    print('\r\nRuntime [s]')
+    print(df_runtime.T)

From fd6a1efa36fad415befa6deedd9e1a1d5d6d76b7 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 24 Nov 2017 11:46:18 +0100
Subject: [PATCH 19/56] changed the initialization order such that variable
 transforms are applied

---
 pymc3/sampling.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 273dd48c8e..fdd9374421 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -621,12 +621,28 @@ def _iter_chains(draws, chains, step, start, tune=None,
     if draws < 1:
         raise ValueError('Argument `draws` should be above 0.')
 
-    # points tracks the current position of each chain in the parameter space
+    # IMPORTANT: The initialization of traces, samplers and points must happen in the right order:
+    # 1. traces are initialized and update_start_vals configures variable transforms
+    # 2. population of points is created
+    # 3. steppers are initialized and linked to the points object
+    # 4. traces are configured to track the sampler stats
+
+
+    # 1. prepare a BaseTrace for each chain
+    traces = [_choose_backend(None, c, model=model) for c in chains]
+    for c,strace in enumerate(traces):
+        # initialize the trace size
+        if len(strace) > 0:
+            update_start_vals(start[c], strace.point(-1), model)
+        else:
+            update_start_vals(start[c], model.test_point, model)
+
+    # 2. create a population (points) that tracks each chain
     # it is updated as the chains are advanced
     points = [Point(start[c], model=model) for c in range(nchains)]
     updates = [None] * nchains
 
-    # Set up the steppers
+    # 3. Set up the steppers
     steppers = [None] * nchains
     for c in range(nchains):
         # need indepenently tuned samplers for each chain
@@ -637,20 +653,12 @@ def _iter_chains(draws, chains, step, start, tune=None,
                 sm.link_population(points, c)
         steppers[c] = chainstep
 
-
-    # prepare a BaseTrace for each chain
-    traces = [_choose_backend(None, c, model=model) for c in chains]
-    for c,strace in enumerate(traces):
-        # initialize the trace size
-        if len(strace) > 0:
-            update_start_vals(start[c], strace.point(-1), model)
-        else:
-            update_start_vals(start[c], model.test_point, model)
-        # initialize tracking of sampler stats
-        if steppers[c].generates_stats and strace.supports_sampler_stats:
-            strace.setup(draws, c, steppers[c].stats_dtypes)
+    # 4. configure tracking of sampler stats
+    for c in range(nchains):
+        if steppers[c].generates_stats and traces[c].supports_sampler_stats:
+            traces[c].setup(draws, c, steppers[c].stats_dtypes)
         else:
-            strace.setup(draws, c)
+            traces[c].setup(draws, c)
 
     try:
         # iterate draws of all chains

From cf4ee2f11750dbd3085c94e30c7e83b59232735e Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 24 Nov 2017 15:25:44 +0100
Subject: [PATCH 20/56] fixed a bug caused by start=None

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index fdd9374421..a3fe370f64 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -369,7 +369,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars))
 
     if start is None:
-        start = [None] * chains
+        start = {}
     if isinstance(start, dict):
         start = [start] * chains
 

From 6733ae155f141c65c8f43a56e94e5918f21b026c Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Mon, 27 Nov 2017 16:02:49 +0100
Subject: [PATCH 21/56] fixes a bug in computing lambda

---
 pymc3/step_methods/metropolis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index de66631168..9beed65b89 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -521,7 +521,7 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1.,
             raise ValueError("Invalid rank for variance: %s" % S.ndim)
 
         self.scaling = np.atleast_1d(scaling).astype('d')
-        self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.ndim)
+        self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size)
         self.tune = tune
         self.tune_interval = tune_interval
         self.steps_until_tune = tune_interval

From 368f4aa9dcd98f48949c6ab02ed06f43064931a9 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Mon, 27 Nov 2017 17:20:00 +0100
Subject: [PATCH 22/56] using a Uniform proposal with low initial scale

---
 pymc3/step_methods/metropolis.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 9beed65b89..f0f82daa1c 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -26,6 +26,11 @@ def __call__(self):
         return nr.normal(scale=self.s)
 
 
+class UniformProposal(Proposal):
+    def __call__(self):
+        return nr.uniform(low=-self.s, high=self.s, size=len(self.s))
+
+
 class CauchyProposal(Proposal):
     def __call__(self):
         return nr.standard_cauchy(size=np.size(self.s)) * self.s
@@ -499,7 +504,7 @@ class DEMetropolis(PopulationArrayStepShared):
         'tune': np.bool,
     }]
 
-    def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1.,
+    def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001,
                  tune=True, tune_interval=100, model=None, mode=None, **kwargs):
 
         model = pm.modelcontext(model)
@@ -513,12 +518,8 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=1.,
 
         if proposal_dist is not None:
             self.proposal_dist = proposal_dist(S)
-        elif S.ndim == 1:
-            self.proposal_dist = NormalProposal(S)
-        elif S.ndim == 2:
-            self.proposal_dist = MultivariateNormalProposal(S)
         else:
-            raise ValueError("Invalid rank for variance: %s" % S.ndim)
+            self.proposal_dist = UniformProposal(S)
 
         self.scaling = np.atleast_1d(scaling).astype('d')
         self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size)

From abcb12ff6f9be62387ee53660dd9b7652be0ab34 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Mon, 27 Nov 2017 18:40:51 +0100
Subject: [PATCH 23/56] renamed local variable

---
 pymc3/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index a3fe370f64..2a80038bf5 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -629,7 +629,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
 
 
     # 1. prepare a BaseTrace for each chain
-    traces = [_choose_backend(None, c, model=model) for c in chains]
+    traces = [_choose_backend(None, chain, model=model) for chain in chains]
     for c,strace in enumerate(traces):
         # initialize the trace size
         if len(strace) > 0:

From acf13115902735811607ad0938d7b47ce1a8941b Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 13:56:14 +0100
Subject: [PATCH 24/56] logging the crossover and scaling

---
 pymc3/step_methods/metropolis.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index f0f82daa1c..a49fb0b59e 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -502,6 +502,9 @@ class DEMetropolis(PopulationArrayStepShared):
     stats_dtypes = [{
         'accept': np.float64,
         'tune': np.bool,
+        'ir1': np.int,
+        'ir2': np.int,
+        'scaling': np.float64,
     }]
 
     def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001,
@@ -562,6 +565,9 @@ def astep(self, q0):
         stats = {
             'tune': self.tune,
             'accept': np.exp(accept),
+            'ir1': ir1,
+            'ir2': ir2,
+            'scaling': self.scaling
         }
 
         return q_new, [stats]

From 3b6a2d92cef276d21cfbd9bcd73464b2dce5924f Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 14:36:52 +0100
Subject: [PATCH 25/56] fixed a bug that caused step methods to not be copied

---
 pymc3/sampling.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 2a80038bf5..385bc8d97e 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -646,7 +646,11 @@ def _iter_chains(draws, chains, step, start, tune=None,
     steppers = [None] * nchains
     for c in range(nchains):
         # need indepenently tuned samplers for each chain
-        chainstep = copy(step)
+        # it is important to copy the actual steppers (but not the delta_logp)
+        if isinstance(step, CompoundStep):
+            chainstep = CompoundStep([copy(m) for m in step.methods])
+        else:
+            chainstep = copy(step)
         # link Population samplers to the shared population state
         for sm in chainstep.methods:
             if isinstance(sm, arraystep.PopulationArrayStepShared):

From 27e826382a197d606821ca76e2cb5f4044c81825 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 16:51:16 +0100
Subject: [PATCH 26/56] smarter multiprocessing

---
 pymc3/sampling.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 385bc8d97e..aaefa81268 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -391,9 +391,13 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
 
     sample_args.update(kwargs)
 
-    parallel = njobs > 1 and chains > 1
+    has_population_samplers = np.any([
+        isinstance(m, arraystep.PopulationArrayStepShared)
+        for m in step.methods
+    ])
+    parallel = njobs > 1 and chains > 1 and not has_population_samplers
     if parallel:
-        pm._log.info('Multiprocess sampling ({} jobs, {} chains)'.format(njobs, chains))
+        pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
@@ -408,7 +412,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
             else:
                 raise
     if not parallel:
-        pm._log.info('Multichain sampling ({} jobs, {} chains)'.format(njobs, chains))
+        pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains))
         trace = _sample_many(**sample_args)
 
     discard = tune if discard_tuned_samples else 0

From d83c5f4d997b9e68aa7adbbe899fe4816137b735 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 16:53:41 +0100
Subject: [PATCH 27/56] automatic multiprocessing decision, reporting relative
 sampling rates

---
 pymc3/examples/samplers_mvnormal.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 50e79d1174..626ce70557 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -41,11 +41,11 @@ def run(steppers, p):
             t_start = time.time()
             mt = pm.sample(
                 draws=10000,
-                njobs=1,
                 chains=6,
                 step=step_cls(),
                 start=start
             )
+            print('{} samples across {} chains'.format(len(mt), mt.nchains))
             runtimes[name] = time.time() - t_start
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
@@ -63,22 +63,32 @@ def run(steppers, p):
         pm.DEMetropolis
     ]
     names = [c.__name__ for c in methods]
-    df_effectiven = pd.DataFrame(columns=['p'] + names)
-    df_effectiven['p'] = [.0,.9]
-    df_effectiven = df_effectiven.set_index('p')
 
-    df_runtime = pd.DataFrame(columns=['p'] + names)
-    df_runtime['p'] = [.0,.9]
-    df_runtime = df_runtime.set_index('p')
+    df_base = pd.DataFrame(columns=['p'] + names)
+    df_base['p'] = [.0,.9]
+    df_base = df_base.set_index('p')
+
+    df_effectiven = df_base.copy()
+    df_runtime = df_base.copy()
+    df_performance = df_base.copy()
 
     for p in df_effectiven.index:
         trace, rate, runtime = run(methods, p)
         for name in names:
             df_effectiven.set_value(p, name, rate[name])
             df_runtime.set_value(p, name, runtime[name])
+            df_performance.set_value(p, name, rate[name] / runtime[name])
 
     print('\r\nEffective sample size [0...1]')
-    print(df_effectiven.T)
+    print(df_effectiven.T.to_string(float_format='{:.3f}'.format))
 
     print('\r\nRuntime [s]')
-    print(df_runtime.T)
+    print(df_runtime.T.to_string(float_format='{:.1f}'.format))
+
+    if 'NUTS' in names:
+        print('\r\nNormalized effective sampling rate [0...1]')
+        df_performance = df_performance.T / df_performance.loc[0]['NUTS']
+    else:
+        print('\r\nNormalized effective sampling rate [1/s]')
+        df_performance = df_performance.T
+    print(df_performance.to_string(float_format='{:.0f}'.format))

From fc4e1d0e5200b5df98ed807959fb618cc1621d25 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 16:55:04 +0100
Subject: [PATCH 28/56] print format

---
 pymc3/examples/samplers_mvnormal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 626ce70557..a7d882ae1c 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -91,4 +91,4 @@ def run(steppers, p):
     else:
         print('\r\nNormalized effective sampling rate [1/s]')
         df_performance = df_performance.T
-    print(df_performance.to_string(float_format='{:.0f}'.format))
+    print(df_performance.to_string(float_format='{:.3f}'.format))

From 569731bccfb74be9d52e2f9e893cd7736cd7fd9d Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 17:30:24 +0100
Subject: [PATCH 29/56] inheriting PopulationArraySharedStep from
 ArrayStepShared, using a bij attribute because DEMetropolis will use it

---
 pymc3/step_methods/arraystep.py | 49 ++++++++++++++-------------------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 590785cd01..581f77ec18 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -150,27 +150,28 @@ def __init__(self, vars, shared, blocked=True):
         self.ordering = ArrayOrdering(vars)
         self.shared = {str(var): shared for var, shared in shared.items()}
         self.blocked = blocked
+        self.bij = None
 
     def step(self, point):
         for var, share in self.shared.items():
             share.set_value(point[var])
 
-        bij = DictToArrayBijection(self.ordering, point)
+        if self.bij is None:
+            self.bij = DictToArrayBijection(self.ordering, point)
 
         if self.generates_stats:
-            apoint, stats = self.astep(bij.map(point))
-            return bij.rmap(apoint), stats
+            apoint, stats = self.astep(self.bij.map(point))
+            return self.bij.rmap(apoint), stats
         else:
-            apoint = self.astep(bij.map(point))
-            return bij.rmap(apoint)
+            apoint = self.astep(self.bij.map(point))
+            return self.bij.rmap(apoint)
 
 
-class PopulationArrayStepShared(BlockedStep):
-    """Faster version of ArrayStep that requires the substep method that does not wrap
-       the functions the step method uses.
+class PopulationArrayStepShared(ArrayStepShared):
+    """Version of ArrayStepShared that allows samplers to access the states
+    of other chains in the population.
 
-    Works by setting shared variables before using the step. This eliminates the mapping
-    and unmapping overhead as well as moving fewer variables around.
+    Works by linking a list of Points that is updated as the chains are iterated.
     """
 
     def __init__(self, vars, shared, blocked=True):
@@ -181,16 +182,20 @@ def __init__(self, vars, shared, blocked=True):
         shared : dict of theano variable -> shared variable
         blocked : Boolean (default True)
         """
-        self.vars = vars
-        self.ordering = ArrayOrdering(vars)
-        self.bij = None
-        self.shared = {str(var): shared for var, shared in shared.items()}
-        self.blocked = blocked
         self.population = None
         self.this_chain = None
         self.other_chains = None
+        return super().__init__(vars, shared, blocked)
 
     def link_population(self, population, chain_index):
+        """Links the sampler to the population.
+
+        Parameters
+        ----------
+        population : list of Points. (The elements of this list must be
+            replaced with current chain states in every iteration.)
+        chain_index : int of the index of this sampler in the population
+        """
         self.population = population
         self.this_chain = chain_index
         self.other_chains = [c for c in range(len(population)) if c != chain_index]
@@ -199,20 +204,6 @@ def link_population(self, population, chain_index):
                 'increase the number of chains.'.format(self.this_chain, self.other_chains))
         return
 
-    def step(self, point):
-        for var, share in self.shared.items():
-            share.set_value(point[var])
-
-        if self.bij is None:
-            self.bij = DictToArrayBijection(self.ordering, point)
-
-        if self.generates_stats:
-            apoint, stats = self.astep(self.bij.map(point))
-            return self.bij.rmap(apoint), stats
-        else:
-            apoint = self.astep(self.bij.map(point))
-            return self.bij.rmap(apoint)
-
 
 class GradientSharedStep(BlockedStep):
     def __init__(self, vars, model=None, blocked=True,

From 8de3977c29c4bd21961ba25f972b087aa165c990 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 17:31:53 +0100
Subject: [PATCH 30/56] printing the number of effective samples per variable

---
 pymc3/examples/samplers_mvnormal.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index a7d882ae1c..2f30e79cf4 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -45,10 +45,11 @@ def run(steppers, p):
                 step=step_cls(),
                 start=start
             )
-            print('{} samples across {} chains'.format(len(mt), mt.nchains))
             runtimes[name] = time.time() - t_start
+            print('{} samples across {} chains'.format(len(mt), mt.nchains))
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
+            print(en)
             if USE_XY:
                 effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
             else:

From 67e07a4e72bf39b668d7596608ff1f411cbb6475 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 17:43:11 +0100
Subject: [PATCH 31/56] docstrings and comments

---
 pymc3/sampling.py                |  6 +++---
 pymc3/step_methods/metropolis.py | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index aaefa81268..5dfe39042c 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -635,7 +635,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
     # 1. prepare a BaseTrace for each chain
     traces = [_choose_backend(None, chain, model=model) for chain in chains]
     for c,strace in enumerate(traces):
-        # initialize the trace size
+        # initialize the trace size and variable transforms
         if len(strace) > 0:
             update_start_vals(start[c], strace.point(-1), model)
         else:
@@ -649,13 +649,13 @@ def _iter_chains(draws, chains, step, start, tune=None,
     # 3. Set up the steppers
     steppers = [None] * nchains
     for c in range(nchains):
-        # need indepenently tuned samplers for each chain
+        # need indepenent samplers for each chain
         # it is important to copy the actual steppers (but not the delta_logp)
         if isinstance(step, CompoundStep):
             chainstep = CompoundStep([copy(m) for m in step.methods])
         else:
             chainstep = copy(step)
-        # link Population samplers to the shared population state
+        # link population samplers to the shared population state
         for sm in chainstep.methods:
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(points, c)
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index a49fb0b59e..386b63da9b 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -473,21 +473,23 @@ def competence(var):
 
 class DEMetropolis(PopulationArrayStepShared):
     """
-    Differential Evolution Metropolis sampling step
+    Differential Evolution Metropolis sampling step.
 
     Parameters
     ----------
+    lamb : float
+        Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
     vars : list
         List of variables for sampler
     S : standard deviation or covariance matrix
         Some measure of variance to parameterize proposal distribution
     proposal_dist : function
         Function that returns zero-mean deviates when parameterized with
-        S (and n). Defaults to normal.
+        S (and n). Defaults to Uniform(-S,+S).
     scaling : scalar or array
-        Initial scale factor for proposal. Defaults to 1.
+        Initial scale factor for epsilon. Defaults to 0.001
     tune : bool
-        Flag for tuning. Defaults to True.
+        Flag for tuning the scaling. Defaults to True.
     tune_interval : int
         The frequency of tuning. Defaults to 100 iterations.
     model : PyMC Model

From 5769b9de0bcfc787c03386783ebd5acaee69276a Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 17:45:49 +0100
Subject: [PATCH 32/56] falling back to sequential sampling if no population
 samplers are used

---
 pymc3/sampling.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 5dfe39042c..d90492237e 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -401,19 +401,22 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
-            pm._log.warn("Could not pickle model, sampling sequentially.")
+            pm._log.warn("Could not pickle model, sampling singlethreaded.")
             pm._log.debug('Pickling error:', exec_info=True)
             parallel = False
         except AttributeError as e:
             if str(e).startswith("AttributeError: Can't pickle"):
-                pm._log.warn("Could not pickle model, sampling sequentially.")
+                pm._log.warn("Could not pickle model, sampling singlethreaded.")
                 pm._log.debug('Pickling error:', exec_info=True)
                 parallel = False
             else:
                 raise
-    if not parallel:
+    if not parallel and has_population_samplers:
         pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains))
         trace = _sample_many(**sample_args)
+    else:
+        pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+        trace = _sample_many_sequentially(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
     return trace[discard:]

From 5f6c29cc396542e84f4ecc134e6e5a622bea5553 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 17:48:32 +0100
Subject: [PATCH 33/56] removed debugging stats logging

---
 pymc3/step_methods/metropolis.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 386b63da9b..e4254b45a0 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -504,9 +504,6 @@ class DEMetropolis(PopulationArrayStepShared):
     stats_dtypes = [{
         'accept': np.float64,
         'tune': np.bool,
-        'ir1': np.int,
-        'ir2': np.int,
-        'scaling': np.float64,
     }]
 
     def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001,
@@ -567,9 +564,6 @@ def astep(self, q0):
         stats = {
             'tune': self.tune,
             'accept': np.exp(accept),
-            'ir1': ir1,
-            'ir2': ir2,
-            'scaling': self.scaling
         }
 
         return q_new, [stats]

From b53b510d52d0f5905a503580a54a897c8fd4f922 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 20:32:35 +0100
Subject: [PATCH 34/56] fixed nested if else

---
 pymc3/sampling.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index d90492237e..29d4ed6556 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -411,12 +411,13 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
                 parallel = False
             else:
                 raise
-    if not parallel and has_population_samplers:
-        pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains))
-        trace = _sample_many(**sample_args)
-    else:
-        pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
-        trace = _sample_many_sequentially(**sample_args)
+    if not parallel:
+       if has_population_samplers:
+            pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains))
+            trace = _sample_many(**sample_args)
+       else:
+            pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+            trace = _sample_many_sequentially(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
     return trace[discard:]

From 4af2aa5b0432b7d3581d03dd16d269a3274b381d Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 28 Nov 2017 20:44:09 +0100
Subject: [PATCH 35/56] updated print statement

---
 pymc3/examples/samplers_mvnormal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 2f30e79cf4..60e9bb4c15 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -46,7 +46,7 @@ def run(steppers, p):
                 start=start
             )
             runtimes[name] = time.time() - t_start
-            print('{} samples across {} chains'.format(len(mt), mt.nchains))
+            print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
             print(en)

From e5f7ff2ffb88781d2dec06961e551e174251a074 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 10:55:28 +0100
Subject: [PATCH 36/56] fixed a bug related to bijection updating

---
 pymc3/step_methods/arraystep.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 581f77ec18..7f3bffd406 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -156,8 +156,7 @@ def step(self, point):
         for var, share in self.shared.items():
             share.set_value(point[var])
 
-        if self.bij is None:
-            self.bij = DictToArrayBijection(self.ordering, point)
+        self.bij = DictToArrayBijection(self.ordering, point)
 
         if self.generates_stats:
             apoint, stats = self.astep(self.bij.map(point))

From 874e6b2bfd49a6176e283018baabb5cae501d3dd Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 12:30:34 +0100
Subject: [PATCH 37/56] docstring and comments

---
 pymc3/examples/samplers_mvnormal.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 60e9bb4c15..a702ebe1cd 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -1,6 +1,9 @@
 """
 Comparing different samplers on a correlated bivariate normal distribution.
 
+This example will sample a bivariate normal with Metropolis, NUTS and DEMetropolis
+at two correlations (0, 0.9) and print out the effective sample sizes, runtime and
+normalized effective sampling rates.
 """
 
 
@@ -11,7 +14,10 @@
 import theano.tensor as tt
 import matplotlib.pyplot as plt
 
-
+# with this flag one can switch between defining the bivariate normal as
+# either a 2D MvNormal (USE_XY = False) split up the two dimensions into
+# two variables 'x' and 'y'.  The latter is recommended because it highlights
+# different behaviour with respect to blocking.
 USE_XY = True
 
 def run(steppers, p):

From eba4a9d46a7cf5a7eb2299fd602ac3ceeb2d686d Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 12:40:21 +0100
Subject: [PATCH 38/56] refactoring for better clarity and less diff

---
 pymc3/sampling.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 29d4ed6556..f2b2516237 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -413,11 +413,11 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
                 raise
     if not parallel:
        if has_population_samplers:
-            pm._log.info('Multichain sampling ({} chains in 1 job)'.format(chains))
-            trace = _sample_many(**sample_args)
+            pm._log.info('Population sampling ({} chains in 1 job)'.format(chains))
+            trace = _sample_population(**sample_args)
        else:
             pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
-            trace = _sample_many_sequentially(**sample_args)
+            trace = _sample_many(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
     return trace[discard:]
@@ -450,7 +450,7 @@ def _check_start_shape(model, start):
         raise ValueError("Bad shape for start argument:{}".format(e))
 
 
-def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs):
+def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
     traces = []
     for i in range(chains):
         trace = _sample(draws=draws, chain=chain + i, start=start[i],
@@ -469,10 +469,12 @@ def _sample_many_sequentially(draws, chain, chains, start, random_seed, **kwargs
     return MultiTrace(traces)
 
 
-def _sample_many(draws, chain, chains, start, random_seed, step, tune, model, progressbar=None, **kwargs):
+def _sample_population(draws, chain, chains, start, random_seed, step, tune,
+        model, progressbar=None, **kwargs):
     # create the generator that iterates all chains in parallel
     chains = [chain + c for c in range(chains)]
-    sampling = _iter_chains(draws, chains, step, start, tune=tune, model=model, random_seed=random_seed)
+    sampling = _iter_chains(draws, chains, step, start, tune=tune,
+                            model=model, random_seed=random_seed)
 
     if progressbar:
         sampling = tqdm(sampling, total=draws)

From f71a37cb663add3cddd81adae688ba96636a6d32 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 12:49:35 +0100
Subject: [PATCH 39/56] code style

---
 pymc3/sampling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index f2b2516237..2ebed3561c 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -622,6 +622,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
     if not isinstance(step, CompoundStep):
         raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__))
+
     # chains contains the chain numbers, but for indexing we need indices...
     nchains = len(chains)
     model = modelcontext(model)
@@ -631,7 +632,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
     if draws < 1:
         raise ValueError('Argument `draws` should be above 0.')
 
-    # IMPORTANT: The initialization of traces, samplers and points must happen in the right order:
+    # The initialization of traces, samplers and points must happen in the right order:
     # 1. traces are initialized and update_start_vals configures variable transforms
     # 2. population of points is created
     # 3. steppers are initialized and linked to the points object

From 63ae017bfe2fb21208e7e684cf57cba3bb21ddb1 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 14:12:18 +0100
Subject: [PATCH 40/56] removed unused import

---
 pymc3/step_methods/metropolis.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index e4254b45a0..4ce3a88c41 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -5,7 +5,6 @@
 
 from ..distributions import draw_values
 from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
-from ..blocking import DictToArrayBijection
 import pymc3 as pm
 from pymc3.theanof import floatX
 

From e20295a12d0f9226ec60e24041d02b01802d477e Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 19:31:07 +0100
Subject: [PATCH 41/56] fixed a bug where Slice was preferred on
 multidimensional variables

---
 pymc3/step_methods/slicer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py
index 3b5ec241cb..f5f7d7e9c5 100644
--- a/pymc3/step_methods/slicer.py
+++ b/pymc3/step_methods/slicer.py
@@ -97,7 +97,7 @@ def astep(self, q0, logp):
     @staticmethod
     def competence(var, has_grad):
         if var.dtype in continuous_types:
-            if not var.shape:
+            if not var.shape or var.shape.ndim == 1:
                 return Competence.PREFERRED
             return Competence.COMPATIBLE
         return Competence.INCOMPATIBLE

From acc7538c65cb391b8a990f9e160bfa6641d8fff5 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 19:35:13 +0100
Subject: [PATCH 42/56] printing the stepper hierarchy, fixed a variable name,
 handling non-CompoundStep methods

---
 pymc3/sampling.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 2ebed3561c..49332df48c 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -145,6 +145,19 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
 
 
+def print_step_hierarchy(s, level=0):
+    if isinstance(s, (list, tuple)):
+        pm._log.info('>' * level + 'list')
+        for i in s:
+            print_step_hierarchy(i, level+1)
+    elif isinstance(s, CompoundStep):
+        pm._log.info('>' * level + 'CompoundStep')
+        for i in s.methods:
+            print_step_hierarchy(i, level+1)
+    else:
+        pm._log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars))
+
+
 def _cpu_count():
     """Try to guess the number of CPUs in the system.
 
@@ -359,15 +372,8 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     else:
         step = assign_step_methods(model, step, step_kwargs=step_kwargs)
 
-    if not isinstance(step, CompoundStep):
-        if not isinstance(step, (list, tuple)):
-            step = [step]
+    if isinstance(step, list):
         step = CompoundStep(step)
-
-    # print the sampler assignment
-    for sm in step.methods:
-        pm._log.info('{}: {}'.format(sm.__class__.__name__, sm.vars))
-
     if start is None:
         start = {}
     if isinstance(start, dict):
@@ -393,11 +399,12 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
 
     has_population_samplers = np.any([
         isinstance(m, arraystep.PopulationArrayStepShared)
-        for m in step.methods
+        for m in (step.methods if isinstance(step, CompoundStep) else [step])
     ])
     parallel = njobs > 1 and chains > 1 and not has_population_samplers
     if parallel:
         pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
+        print_step_hierarchy(step)
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
@@ -414,9 +421,11 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     if not parallel:
        if has_population_samplers:
             pm._log.info('Population sampling ({} chains in 1 job)'.format(chains))
+            print_step_hierarchy(step)
             trace = _sample_population(**sample_args)
        else:
             pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+            print_step_hierarchy(step)
             trace = _sample_many(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
@@ -483,7 +492,7 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune,
     for it,traces in enumerate(sampling):
         latest_traces = traces
         # TODO: add support for liveplot during population-sampling
-    return MultiTrace(traces)
+    return MultiTrace(latest_traces)
 
 
 def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
@@ -620,9 +629,6 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
 
 def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
-    if not isinstance(step, CompoundStep):
-        raise ValueError('step-kwarg must be CompoundStep, got {} instead.'.format(step.__class__))
-
     # chains contains the chain numbers, but for indexing we need indices...
     nchains = len(chains)
     model = modelcontext(model)
@@ -663,7 +669,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
         else:
             chainstep = copy(step)
         # link population samplers to the shared population state
-        for sm in chainstep.methods:
+        for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]):
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(points, c)
         steppers[c] = chainstep

From c3c233c00394996ff0506543379629f677730208 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 19:36:15 +0100
Subject: [PATCH 43/56] fixed a bug where DEMetropolis assigned itself to
 discrete vars, fixed a bug that happened when only one var was assigned

---
 pymc3/step_methods/metropolis.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 4ce3a88c41..4deaf562de 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -505,13 +505,13 @@ class DEMetropolis(PopulationArrayStepShared):
         'tune': np.bool,
     }]
 
-    def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.001,
+    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
                  tune=True, tune_interval=100, model=None, mode=None, **kwargs):
 
         model = pm.modelcontext(model)
 
         if vars is None:
-            vars = model.vars
+            vars = model.cont_vars
         vars = pm.inputvars(vars)
 
         if S is None:
@@ -523,7 +523,9 @@ def __init__(self, lamb=None, vars=None, S=None, proposal_dist=None, scaling=0.0
             self.proposal_dist = UniformProposal(S)
 
         self.scaling = np.atleast_1d(scaling).astype('d')
-        self.lamb = lamb if lamb is not None else 2.38 / np.sqrt(2 * S.size)
+        if lamb is None:
+            lamb = 2.38 / np.sqrt(2 * S.size)
+        self.lamb = float(lamb)
         self.tune = tune
         self.tune_interval = tune_interval
         self.steps_until_tune = tune_interval

From 01adad83c630b1212e754b176a279e94bdad9d0e Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Wed, 29 Nov 2017 20:18:37 +0100
Subject: [PATCH 44/56] improved code style, including Slice in comparison

---
 pymc3/examples/samplers_mvnormal.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index a702ebe1cd..c7007a562f 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -12,7 +12,6 @@
 import pandas as pd
 import pymc3 as pm
 import theano.tensor as tt
-import matplotlib.pyplot as plt
 
 # with this flag one can switch between defining the bivariate normal as
 # either a 2D MvNormal (USE_XY = False) split up the two dimensions into
@@ -33,7 +32,7 @@ def run(steppers, p):
             mu = np.array([0.,0.])
             cov = np.array([[1.,p],[p,1.]])
             z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y]))
-            p = pm.Potential('logp_xy', z)
+            pot = pm.Potential('logp_xy', z)
             start = {'x': 0, 'y': 0}
         else:
             mu = np.array([0.,0.])
@@ -43,7 +42,6 @@ def run(steppers, p):
 
         for step_cls in steppers:
             name = step_cls.__name__
-            print('\r\nStep method: {}'.format(name))
             t_start = time.time()
             mt = pm.sample(
                 draws=10000,
@@ -55,7 +53,7 @@ def run(steppers, p):
             print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
-            print(en)
+            print('effective: {}\r\n'.format(en))
             if USE_XY:
                 effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
             else:
@@ -66,6 +64,7 @@ def run(steppers, p):
 if __name__ == '__main__':
     methods = [
         pm.Metropolis,
+        pm.Slice,
         pm.NUTS,
         pm.DEMetropolis
     ]

From 44b111570e6e7a4074de988528952a0bc249fb05 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 30 Nov 2017 11:46:40 +0100
Subject: [PATCH 45/56] including DEMetropolis in existing tests, added test
 case for PopulationSamplers

---
 pymc3/tests/test_step.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index 22eb7053cc..8e12607d95 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -9,7 +9,7 @@
 from pymc3.step_methods import (NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
                                 Metropolis, Slice, CompoundStep, NormalProposal,
                                 MultivariateNormalProposal, HamiltonianMC,
-                                EllipticalSlice, smc)
+                                EllipticalSlice, smc, DEMetropolis)
 from pymc3.theanof import floatX
 from pymc3 import SamplingError
 from pymc3.distributions import (
@@ -318,7 +318,7 @@ def test_mv_proposal(self):
 
 
 class TestCompoundStep(object):
-    samplers = (Metropolis, Slice, HamiltonianMC, NUTS)
+    samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis)
 
     @pytest.mark.skipif(theano.config.floatX == "float32",
                         reason="Test fails on 32 bit due to linalg issues")
@@ -393,6 +393,22 @@ def kill_grad(x):
         assert isinstance(steps, Slice)
 
 
+class TestPopulationSamplers(object):
+    def test_checks_population_size(self):
+        """Test that population samplers check the population size."""
+        steppers = [
+            DEMetropolis
+        ]
+        with Model() as model:
+            n = Normal('n', mu=0, sd=1)
+            for stepper in steppers:
+                step = stepper()
+                with pytest.raises(ValueError):
+                    trace = sample(draws=100, chains=1, step=step)
+                trace = sample(draws=100, chains=4, step=step)
+        pass
+
+
 @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
 class TestNutsCheckTrace(object):
     def test_multiple_samplers(self):

From 9a07a43905869de8abb90ba60c41ff3b70e1b06c Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 30 Nov 2017 14:07:24 +0100
Subject: [PATCH 46/56] fixes python 2.7 compatibility

---
 pymc3/step_methods/arraystep.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 7f3bffd406..5d6d1db1a2 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -184,7 +184,7 @@ def __init__(self, vars, shared, blocked=True):
         self.population = None
         self.this_chain = None
         self.other_chains = None
-        return super().__init__(vars, shared, blocked)
+        return super(PopulationArrayStepShared, self).__init__(vars, shared, blocked)
 
     def link_population(self, population, chain_index):
         """Links the sampler to the population.

From e2cfbbb918c735c191a946d25186ba1f204f2b94 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Thu, 30 Nov 2017 20:26:58 +0100
Subject: [PATCH 47/56] Using multiprocessing to parallelize iteration of chain
 populations (initial)

---
 pymc3/sampling.py | 149 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 128 insertions(+), 21 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 49332df48c..ade7426502 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,7 +1,7 @@
 from collections import defaultdict, Iterable
 from copy import copy
 import pickle
-
+import multiprocessing
 from joblib import Parallel, delayed
 import numpy as np
 import warnings
@@ -627,6 +627,108 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             step.report._finalize(strace)
 
 
+class ChainWorkers(object):
+    def __init__(self, steppers):
+        self.nchains = len(steppers)
+        self._master_ends = []
+        self._processes = []
+        # configure a child process for each stepper
+        for c,stepper in enumerate(steppers):
+            slave_end, master_end = multiprocessing.Pipe()
+            stepper_dumps = pickle.dumps(stepper, protocol=4)
+            process = multiprocessing.Process(
+                target=self.__class__._run_slave,
+                args=(c, stepper_dumps, slave_end),
+                name='ChainWalker{}'.format(c)
+            )
+            self._master_ends.append(master_end)
+            self._processes.append(process)
+        return super(ChainWorkers, self).__init__()
+
+    def __enter__(self):
+        for process in self._processes:
+            process.start()
+        return
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        try:
+            # terminate the workers
+            for master_end in self._master_ends:
+                master_end.send(None)
+            # join the processes
+            for process in self._processes:
+                process.join(timeout=3)
+        except:
+            pm._log.warning('Termination failed.')
+        return
+
+    @staticmethod
+    def _run_slave(c, stepper_dumps, slave_end):
+        """Started on a separate process to perform stepping of a chain.
+
+        Parameters
+        ----------
+        c : int
+            number of this chain
+        stepper : BlockedStep
+            a step method such as CompoundStep
+        slave_end : multiprocessing.connection.PipeConnection
+            This is our connection to the main process
+        """
+        try:
+            stepper = pickle.loads(stepper_dumps)
+            # the stepper is not necessarily a PopulationArraySharedStep itself,
+            # but rather a CompoundStep, but PopulationArrayStepShared.population
+            # has to be updated. Therefore we identify the substeppers first.
+            population_steppers = []
+            for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]):
+                if isinstance(sm, arraystep.PopulationArrayStepShared):
+                    population_steppers.append(sm)
+            while True:
+                incoming = slave_end.recv()
+                if incoming is None:
+                    break
+                tune_stop, population = incoming
+                if tune_stop:
+                    stop_tuning(stepper)
+                # forward the population to the PopulationArrayStepShared objects
+                for popstep in population_steppers:
+                    popstep.population = population
+                update = stepper.step(population[c])
+                slave_end.send(update)
+        except Exception:
+            pm._log.exception('ChainWalker{}'.format(c))
+        return
+
+    def step_start(self, tune_stop, population):
+        """Submits the instructions to make a step.
+
+        Parameters
+        ----------
+        tune_stop : bool
+            Indicates if the condition (i == tune) is fulfilled
+        population : list
+            Current Points of all chains
+        """
+        for c in range(self.nchains):
+            self._master_ends[c].send((tune_stop, population))
+        return
+
+    def step_end(self):
+        """Blockingly waits for the completion of a step.
+
+        Returns
+        -------
+        update : Point
+            The new position of the chain
+        """
+        updates = [
+            self._master_ends[c].recv()
+            for c in range(self.nchains)
+        ]
+        return updates
+
+
 def _iter_chains(draws, chains, step, start, tune=None,
                  model=None, random_seed=None):
     # chains contains the chain numbers, but for indexing we need indices...
@@ -657,7 +759,6 @@ def _iter_chains(draws, chains, step, start, tune=None,
     # 2. create a population (points) that tracks each chain
     # it is updated as the chains are advanced
     points = [Point(start[c], model=model) for c in range(nchains)]
-    updates = [None] * nchains
 
     # 3. Set up the steppers
     steppers = [None] * nchains
@@ -682,27 +783,33 @@ def _iter_chains(draws, chains, step, start, tune=None,
             traces[c].setup(draws, c)
 
     try:
-        # iterate draws of all chains
-        for i in range(draws):
-            # step each of the chains
-            for c in range(nchains):
-                if i == tune:
-                    steppers[c] = stop_tuning(steppers[c])
-                updates[c] = steppers[c].step(points[c])
-
-            # apply the update to the points and record to the traces
-            for c,strace in enumerate(traces):
-                if steppers[c].generates_stats:
-                    points[c], states = updates[c]
-                    if strace.supports_sampler_stats:
-                        strace.record(points[c], states)
+        workers = ChainWorkers(steppers)
+        with workers:
+            # iterate draws of all chains
+            for i in range(draws):
+                tune_stop = i == tune
+                workers.step_start(tune_stop, points)
+                updates = workers.step_end()
+
+                ## step each of the chains
+                #for c in range(nchains):
+                #    if i == tune:
+                #        steppers[c] = stop_tuning(steppers[c])
+                #    updates[c] = steppers[c].step(points[c])
+
+                # apply the update to the points and record to the traces
+                for c,strace in enumerate(traces):
+                    if steppers[c].generates_stats:
+                        points[c], states = updates[c]
+                        if strace.supports_sampler_stats:
+                            strace.record(points[c], states)
+                        else:
+                            strace.record(points[c])
                     else:
+                        points[c] = updates[c]
                         strace.record(points[c])
-                else:
-                    points[c] = updates[c]
-                    strace.record(points[c])
-            # yield the state of all chains in parallel
-            yield traces
+                # yield the state of all chains in parallel
+                yield traces
     except KeyboardInterrupt:
         for c,strace in enumerate(traces):
             strace.close()

From 30f437d59eb78402fed5d40b9a4351ccd1756a51 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 11:04:57 +0100
Subject: [PATCH 48/56] added references

---
 pymc3/step_methods/metropolis.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 4deaf562de..69e06fcbb6 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -495,6 +495,14 @@ class DEMetropolis(PopulationArrayStepShared):
         Optional model for sampling step. Defaults to None (taken from context).
     mode :  string or `Mode` instance.
         compilation mode passed to Theano functions
+
+    References
+    ----------
+    .. [Braak2006] Cajo C.F. ter Braak (2006).
+        A Markov Chain Monte Carlo version of the genetic algorithm
+        Differential Evolution: easy Bayesian computing for real parameter spaces.
+        Statistics and Computing
+        `link <https://doi.org/10.1007/s11222-006-8769-1>`__
     """
     name = 'DEMetropolis'
 

From 4998005e46eb3ccc5abea12298418de1dacdcb7d Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 11:11:23 +0100
Subject: [PATCH 49/56] added a warning that DEMetropolis is experimental

---
 pymc3/step_methods/metropolis.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 69e06fcbb6..b3c551c2cd 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -2,6 +2,7 @@
 import numpy.random as nr
 import theano
 import scipy.linalg
+import warnings
 
 from ..distributions import draw_values
 from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
@@ -515,6 +516,8 @@ class DEMetropolis(PopulationArrayStepShared):
 
     def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
                  tune=True, tune_interval=100, model=None, mode=None, **kwargs):
+        warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \
+            'Use carefully and be extra critical about their results!')
 
         model = pm.modelcontext(model)
 

From 107a618c0cb2fc90411ec0ceeea7afb90e4b8b2a Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 13:23:19 +0100
Subject: [PATCH 50/56] forgotten space

---
 pymc3/step_methods/metropolis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index b3c551c2cd..3daed1f780 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -517,7 +517,7 @@ class DEMetropolis(PopulationArrayStepShared):
     def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
                  tune=True, tune_interval=100, model=None, mode=None, **kwargs):
         warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \
-            'Use carefully and be extra critical about their results!')
+            ' Use carefully and be extra critical about their results!')
 
         model = pm.modelcontext(model)
 

From b94906ad23627d156fcc7dba20b104c0ed9a60bf Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 13:51:27 +0100
Subject: [PATCH 51/56] modified the PopulationStepper to automatically use
 parallelization or fallback to sequential mode

---
 pymc3/sampling.py | 189 ++++++++++++++++++++++++++++++----------------
 1 file changed, 123 insertions(+), 66 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index ade7426502..348bf96861 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -420,9 +420,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
                 raise
     if not parallel:
        if has_population_samplers:
-            pm._log.info('Population sampling ({} chains in 1 job)'.format(chains))
+            pm._log.info('Population sampling ({} chains)'.format(chains))
             print_step_hierarchy(step)
-            trace = _sample_population(**sample_args)
+            trace = _sample_population(parallelize=njobs > 1, **sample_args)
        else:
             pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
             print_step_hierarchy(step)
@@ -479,11 +479,11 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
 
 
 def _sample_population(draws, chain, chains, start, random_seed, step, tune,
-        model, progressbar=None, **kwargs):
+        model, progressbar=None, parallelize=True, **kwargs):
     # create the generator that iterates all chains in parallel
     chains = [chain + c for c in range(chains)]
-    sampling = _iter_chains(draws, chains, step, start, tune=tune,
-                            model=model, random_seed=random_seed)
+    sampling = _prepare_iter_population(draws, chains, step, start, tune=tune,
+                    model=model, random_seed=random_seed, parallelize=parallelize)
 
     if progressbar:
         sampling = tqdm(sampling, total=draws)
@@ -627,39 +627,68 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             step.report._finalize(strace)
 
 
-class ChainWorkers(object):
-    def __init__(self, steppers):
+class PopulationStepper(object):
+    def __init__(self, steppers, parallelize=True):
+        """Tries to use multiprocessing to parallelize chains.
+
+        Falls back to sequential evaluation if multiprocessing fails.
+
+        In the multiprocessing mode of operation, a new process is started for each
+        chain/stepper and Pipes are used to communicate with the main process.
+
+        Parameters
+        ----------
+        steppers : list
+            A collection of independent step methods, one for each chain.
+        parallelize : bool
+            Indicates if chain parallelization is desired
+        """
         self.nchains = len(steppers)
+        self.is_parallelized = False
         self._master_ends = []
         self._processes = []
-        # configure a child process for each stepper
-        for c,stepper in enumerate(steppers):
-            slave_end, master_end = multiprocessing.Pipe()
-            stepper_dumps = pickle.dumps(stepper, protocol=4)
-            process = multiprocessing.Process(
-                target=self.__class__._run_slave,
-                args=(c, stepper_dumps, slave_end),
-                name='ChainWalker{}'.format(c)
-            )
-            self._master_ends.append(master_end)
-            self._processes.append(process)
-        return super(ChainWorkers, self).__init__()
+        self._steppers = steppers
+        if parallelize and sys.version_info >= (3,4):
+            try:
+                # configure a child process for each stepper
+                pm._log.info('Attempting to parallelize chains.')
+                for c,stepper in enumerate(steppers):
+                    slave_end, master_end = multiprocessing.Pipe()
+                    stepper_dumps = pickle.dumps(stepper, protocol=4)
+                    process = multiprocessing.Process(
+                        target=self.__class__._run_slave,
+                        args=(c, stepper_dumps, slave_end),
+                        name='ChainWalker{}'.format(c)
+                    )
+                    # Starting the process might fail and takes time.
+                    # By doing it in the constructor, the sampling progress bar
+                    # will not be confused by the process start.
+                    process.start()
+                    self._master_ends.append(master_end)
+                    self._processes.append(process)
+                self.is_parallelized = True
+            except:
+                pm._log.info('Population parallelization failed. ' \
+                                'Falling back to sequential stepping of chains.')
+        else:
+            if parallelize:
+                warnings.warn('Population parallelization is only supported on Python 3.4 and ' \
+                    'higher.  All {} chains will step on one process.'.format(self.nchains))
+        return super(PopulationStepper, self).__init__()
 
     def __enter__(self):
-        for process in self._processes:
-            process.start()
+        """Does nothing because processes are already started in __init__."""
         return
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        try:
-            # terminate the workers
-            for master_end in self._master_ends:
-                master_end.send(None)
-            # join the processes
-            for process in self._processes:
-                process.join(timeout=3)
-        except:
-            pm._log.warning('Termination failed.')
+        if len(self._processes) > 0:
+            try:
+                for master_end in self._master_ends:
+                    master_end.send(None)
+                for process in self._processes:
+                    process.join(timeout=3)
+            except:
+                pm._log.warning('Termination failed.')
         return
 
     @staticmethod
@@ -678,20 +707,23 @@ def _run_slave(c, stepper_dumps, slave_end):
         try:
             stepper = pickle.loads(stepper_dumps)
             # the stepper is not necessarily a PopulationArraySharedStep itself,
-            # but rather a CompoundStep, but PopulationArrayStepShared.population
-            # has to be updated. Therefore we identify the substeppers first.
+            # but rather a CompoundStep. PopulationArrayStepShared.population
+            # has to be updated, therefore we identify the substeppers first.
             population_steppers = []
             for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]):
                 if isinstance(sm, arraystep.PopulationArrayStepShared):
                     population_steppers.append(sm)
             while True:
                 incoming = slave_end.recv()
+                # receiving a None is the signal to exit
                 if incoming is None:
                     break
                 tune_stop, population = incoming
                 if tune_stop:
                     stop_tuning(stepper)
                 # forward the population to the PopulationArrayStepShared objects
+                # This is necessary because due to the process fork, the population
+                # object is no longer shared between the steppers.
                 for popstep in population_steppers:
                     popstep.population = population
                 update = stepper.step(population[c])
@@ -700,8 +732,8 @@ def _run_slave(c, stepper_dumps, slave_end):
             pm._log.exception('ChainWalker{}'.format(c))
         return
 
-    def step_start(self, tune_stop, population):
-        """Submits the instructions to make a step.
+    def step(self, tune_stop, population):
+        """Steps the entire population of chains.
 
         Parameters
         ----------
@@ -709,28 +741,36 @@ def step_start(self, tune_stop, population):
             Indicates if the condition (i == tune) is fulfilled
         population : list
             Current Points of all chains
-        """
-        for c in range(self.nchains):
-            self._master_ends[c].send((tune_stop, population))
-        return
-
-    def step_end(self):
-        """Blockingly waits for the completion of a step.
 
         Returns
         -------
         update : Point
-            The new position of the chain
+            The new positions of the chains
         """
-        updates = [
-            self._master_ends[c].recv()
-            for c in range(self.nchains)
-        ]
+        updates = [None] * self.nchains
+        if self.is_parallelized:
+            for c in range(self.nchains):
+                self._master_ends[c].send((tune_stop, population))
+            # Blockingly get the step outcomes
+            for c in range(self.nchains):
+                updates[c] = self._master_ends[c].recv()
+        else:
+            for c in range(self.nchains):
+                if tune_stop:
+                    self._steppers[c] = stop_tuning(self._steppers[c])
+                updates[c] = self._steppers[c].step(population[c])
         return updates
 
 
-def _iter_chains(draws, chains, step, start, tune=None,
-                 model=None, random_seed=None):
+def _prepare_iter_population(draws, chains, step, start, tune=None,
+                 model=None, random_seed=None, parallelize=True):
+    """Prepares a PopulationStepper and traces for population sampling.
+
+    Returns
+    -------
+    _iter_population : generator
+        The generator the yields traces of all chains at the same time
+    """
     # chains contains the chain numbers, but for indexing we need indices...
     nchains = len(chains)
     model = modelcontext(model)
@@ -745,7 +785,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
     # 2. population of points is created
     # 3. steppers are initialized and linked to the points object
     # 4. traces are configured to track the sampler stats
-
+    # 5. a PopulationStepper is configured for parallelized stepping
 
     # 1. prepare a BaseTrace for each chain
     traces = [_choose_backend(None, chain, model=model) for chain in chains]
@@ -758,7 +798,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
 
     # 2. create a population (points) that tracks each chain
     # it is updated as the chains are advanced
-    points = [Point(start[c], model=model) for c in range(nchains)]
+    population = [Point(start[c], model=model) for c in range(nchains)]
 
     # 3. Set up the steppers
     steppers = [None] * nchains
@@ -772,7 +812,7 @@ def _iter_chains(draws, chains, step, start, tune=None,
         # link population samplers to the shared population state
         for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]):
             if isinstance(sm, arraystep.PopulationArrayStepShared):
-                sm.link_population(points, c)
+                sm.link_population(population, c)
         steppers[c] = chainstep
 
     # 4. configure tracking of sampler stats
@@ -782,20 +822,37 @@ def _iter_chains(draws, chains, step, start, tune=None,
         else:
             traces[c].setup(draws, c)
 
+    # 5. configure the PopulationStepper (expensive call)
+    popstep = PopulationStepper(steppers, parallelize)
+
+    # Because the preperations above are expensive, the actual iterator is
+    # in another method. This way the progbar will not be disturbed.
+    return _iter_population(draws, tune, popstep, steppers, traces, population)
+
+
+def _iter_population(draws, tune, popstep, steppers, traces, points):
+    """Generator that iterates a PopulationStepper.
+
+    Parameters
+    ----------
+    draws : int
+        number of draws per chain
+    tune : int
+        number of tuning steps
+    popstep : PopulationStepper
+        the helper object for (parallelized) stepping of chains
+    steppers : list
+        The step methods for each chain
+    traces : list
+        Traces for each chain
+    points : list
+        population of chain states
+    """
     try:
-        workers = ChainWorkers(steppers)
-        with workers:
+        with popstep:
             # iterate draws of all chains
             for i in range(draws):
-                tune_stop = i == tune
-                workers.step_start(tune_stop, points)
-                updates = workers.step_end()
-
-                ## step each of the chains
-                #for c in range(nchains):
-                #    if i == tune:
-                #        steppers[c] = stop_tuning(steppers[c])
-                #    updates[c] = steppers[c].step(points[c])
+                updates = popstep.step(i == tune, points)
 
                 # apply the update to the points and record to the traces
                 for c,strace in enumerate(traces):
@@ -813,8 +870,8 @@ def _iter_chains(draws, chains, step, start, tune=None,
     except KeyboardInterrupt:
         for c,strace in enumerate(traces):
             strace.close()
-            if hasattr(step, 'report'):
-                step.report._finalize(strace)
+            if hasattr(steppers[c], 'report'):
+                steppers[c].report._finalize(strace)
         raise
     except BaseException:
         for c,strace in enumerate(traces):
@@ -823,8 +880,8 @@ def _iter_chains(draws, chains, step, start, tune=None,
     else:
         for c,strace in enumerate(traces):
             strace.close()
-            if hasattr(step, 'report'):
-                step.report._finalize(strace)
+            if hasattr(steppers[c], 'report'):
+                steppers[c].report._finalize(strace)
 
 
 def _choose_backend(trace, chain, shortcuts=None, **kwds):

From 58fa3365dee428c469a57fbb85bcebf32ff7d53f Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 15:10:42 +0100
Subject: [PATCH 52/56] avoiding a reimport, disabled chain parallelization by
 default

---
 pymc3/sampling.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 348bf96861..b9459002d4 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,7 +1,6 @@
 from collections import defaultdict, Iterable
 from copy import copy
 import pickle
-import multiprocessing
 from joblib import Parallel, delayed
 import numpy as np
 import warnings
@@ -422,7 +421,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
        if has_population_samplers:
             pm._log.info('Population sampling ({} chains)'.format(chains))
             print_step_hierarchy(step)
-            trace = _sample_population(parallelize=njobs > 1, **sample_args)
+            trace = _sample_population(**sample_args)
        else:
             pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
             print_step_hierarchy(step)
@@ -479,11 +478,11 @@ def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
 
 
 def _sample_population(draws, chain, chains, start, random_seed, step, tune,
-        model, progressbar=None, parallelize=True, **kwargs):
+        model, progressbar=None, parallelize=False, **kwargs):
     # create the generator that iterates all chains in parallel
     chains = [chain + c for c in range(chains)]
-    sampling = _prepare_iter_population(draws, chains, step, start, tune=tune,
-                    model=model, random_seed=random_seed, parallelize=parallelize)
+    sampling = _prepare_iter_population(draws, chains, step, start, parallelize,
+                                        tune=tune, model=model, random_seed=random_seed)
 
     if progressbar:
         sampling = tqdm(sampling, total=draws)
@@ -628,7 +627,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
 
 
 class PopulationStepper(object):
-    def __init__(self, steppers, parallelize=True):
+    def __init__(self, steppers, parallelize):
         """Tries to use multiprocessing to parallelize chains.
 
         Falls back to sequential evaluation if multiprocessing fails.
@@ -652,6 +651,7 @@ def __init__(self, steppers, parallelize=True):
             try:
                 # configure a child process for each stepper
                 pm._log.info('Attempting to parallelize chains.')
+                import multiprocessing
                 for c,stepper in enumerate(steppers):
                     slave_end, master_end = multiprocessing.Pipe()
                     stepper_dumps = pickle.dumps(stepper, protocol=4)
@@ -674,6 +674,9 @@ def __init__(self, steppers, parallelize=True):
             if parallelize:
                 warnings.warn('Population parallelization is only supported on Python 3.4 and ' \
                     'higher.  All {} chains will step on one process.'.format(self.nchains))
+            else:
+                pm._log.info('Chains are not parallelized. You can enable this by passing ' \
+                             'pm.sample(parallelize=True).')
         return super(PopulationStepper, self).__init__()
 
     def __enter__(self):
@@ -762,8 +765,8 @@ def step(self, tune_stop, population):
         return updates
 
 
-def _prepare_iter_population(draws, chains, step, start, tune=None,
-                 model=None, random_seed=None, parallelize=True):
+def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
+                 model=None, random_seed=None):
     """Prepares a PopulationStepper and traces for population sampling.
 
     Returns

From 3bcf0fde61001c0a0e95498dcfe72ed0c7ca6482 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Fri, 1 Dec 2017 15:14:23 +0100
Subject: [PATCH 53/56] increased nchains

---
 pymc3/examples/samplers_mvnormal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index c7007a562f..14b87d7958 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -45,7 +45,7 @@ def run(steppers, p):
             t_start = time.time()
             mt = pm.sample(
                 draws=10000,
-                chains=6,
+                chains=16, parallelize=False,
                 step=step_cls(),
                 start=start
             )

From c91974230e7635761189af9eb089fe87d1746ede Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Mon, 4 Dec 2017 16:32:39 +0100
Subject: [PATCH 54/56] resolving conflicts

---
 pymc3/sampling.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index b9459002d4..43d8b53032 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,6 +1,8 @@
 from collections import defaultdict, Iterable
 from copy import copy
 import pickle
+from six import integer_types
+
 from joblib import Parallel, delayed
 import numpy as np
 import warnings
@@ -11,7 +13,7 @@
 from .backends.ndarray import NDArray
 from .model import modelcontext, Point
 from .step_methods import arraystep
-from .step_methods import (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis,
+from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                            BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
                            Slice, CompoundStep)
 from .util import update_start_vals
@@ -24,7 +26,7 @@
 
 __all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts']
 
-STEP_METHODS = (NUTS, HamiltonianMC, SGFS, Metropolis, BinaryMetropolis,
+STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                 BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
 
 

From 24b4d633ccd696f8fa3fff0c34b80a0d5afaab26 Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Mon, 4 Dec 2017 16:40:09 +0100
Subject: [PATCH 55/56] resolving conflicts

---
 pymc3/sampling.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 43d8b53032..f2bd3865fa 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -12,10 +12,9 @@
 from .backends.base import BaseTrace, MultiTrace
 from .backends.ndarray import NDArray
 from .model import modelcontext, Point
-from .step_methods import arraystep
 from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                            BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
-                           Slice, CompoundStep)
+                           Slice, CompoundStep, arraystep)
 from .util import update_start_vals
 from .vartypes import discrete_types
 from pymc3.step_methods.hmc import quadpotential

From 7db836a9a9860c8279b037cbc3542fd04e6a012e Mon Sep 17 00:00:00 2001
From: Michael Osthege <zufallsprinzip@hotmail.de>
Date: Tue, 5 Dec 2017 11:55:17 +0100
Subject: [PATCH 56/56] included DEMetropolis in new features

---
 RELEASE-NOTES.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 69c44cd3b8..3572ada903 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -5,6 +5,9 @@
 - Improve NUTS initialization `advi+adapt_diag_grad` and add `jitter+adapt_diag_grad` (#2643)
 - Fixed `compareplot` to use `loo` output.
 
+### New Features
+- Michael Osthege added support for population-samplers and implemented differential evolution metropolis (`DEMetropolis`).  For models with correlated dimensions that can not use gradient-based samplers, the `DEMetropolis` sampler can give higher effective sampling rates. (also see [PR#2735](https://github.com/pymc-devs/pymc3/pull/2735))
+
 
 ## PyMC3 3.2 (October 10, 2017)