Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

return_inferencedata option for pm.sample #3911

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a00fff8
mention arviz functions by name in warning
michaelosthege May 4, 2020
6dec963
convert to InferenceData already in sample function
michaelosthege May 4, 2020
7c4ae31
fix refactoring bugs
michaelosthege May 4, 2020
653e3d2
fix indentation
michaelosthege May 4, 2020
7789c2c
add return_inferencedata option
michaelosthege May 4, 2020
d2b312f
Fix numpy docstring format.
rpgoldman May 4, 2020
1c558d4
pass model to from_pymc3 because of deprecation warning
michaelosthege May 4, 2020
2cf5e54
add test for return_inferencedata option
michaelosthege May 4, 2020
bb3aef0
Merge branch 'master' into return-inferencedata-option
michaelosthege May 4, 2020
67d9c58
advise against keeping warmup draws in a MultiTrace
michaelosthege May 4, 2020
c910221
mention #3911
michaelosthege May 4, 2020
0a06da1
Merge branch 'return-inferencedata-option' of https://github.com/mich…
michaelosthege May 4, 2020
fea82e1
pin to arviz 0.8.0 and address review feedback
michaelosthege May 23, 2020
d5063f2
rerun/update notebook to show inferencedata trace
michaelosthege May 23, 2020
1f42f92
Merge branch 'master' into return-inferencedata-option
michaelosthege May 23, 2020
d95880d
fix typo
michaelosthege May 23, 2020
b7471a8
make all from_pymc3 accessible to the user
michaelosthege May 23, 2020
165cbf7
Merge branch 'return-inferencedata-option' of https://github.com/mich…
michaelosthege May 23, 2020
dee0870
remove duplicate entry, and wording
michaelosthege May 23, 2020
7b083ed
address review feedback
michaelosthege May 23, 2020
96a2387
update arviz to 0.8.1 because of bugfix
michaelosthege May 25, 2020
6ff17a2
incorporate review feedback
michaelosthege May 25, 2020
fa324ff
use arviz plot_ppc
michaelosthege May 25, 2020
9d208c4
also ignore Visual Studio cache
michaelosthege May 26, 2020
d154a04
fix warmup saving logic and test
michaelosthege May 26, 2020
2912825
require latest ArviZ patch
michaelosthege May 26, 2020
50d7588
change warning to nuget users towards InferenceData
michaelosthege May 26, 2020
ff7e1ea
update ArviZ minimum version
michaelosthege May 26, 2020
e698387
address review feedback
michaelosthege May 26, 2020
3f6aafd
start showing the FutureWarning about return_inferencedata in minor r…
michaelosthege May 26, 2020
0f756e3
require arviz>=0.8.3 for latest bugfix
michaelosthege May 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pymc3/backends/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def save_trace(trace: MultiTrace, directory: Optional[str]=None, overwrite=False
"""
warnings.warn(
'The `save_trace` function will soon be removed.'
'Instead, use ArviZ to save/load traces.',
'Instead, use `arviz.to_netcdf` to save traces.',
DeprecationWarning,
)

Expand Down Expand Up @@ -98,7 +98,7 @@ def load_trace(directory: str, model=None) -> MultiTrace:
"""
warnings.warn(
'The `load_trace` function will soon be removed.'
'Instead, use ArviZ to save/load traces.',
'Instead, use `arviz.from_netcdf` to load traces.',
DeprecationWarning,
)
straces = []
Expand Down
17 changes: 7 additions & 10 deletions pymc3/backends/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import typing
from ..util import is_transformed_name, get_untransformed_name

import arviz

logger = logging.getLogger('pymc3')

Expand Down Expand Up @@ -98,31 +99,27 @@ def raise_ok(self, level='error'):
if errors:
raise ValueError('Serious convergence issues during sampling.')

def _run_convergence_checks(self, trace, model):
if trace.nchains == 1:
def _run_convergence_checks(self, idata:arviz.InferenceData, model):
if idata.posterior.sizes['chain'] == 1:
msg = ("Only one chain was sampled, this makes it impossible to "
"run some convergence checks")
warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info',
None, None, None)
self._add_warnings([warn])
return

from pymc3 import rhat, ess
from arviz import from_pymc3

valid_name = [rv.name for rv in model.free_RVs + model.deterministics]
varnames = []
for rv in model.free_RVs:
rv_name = rv.name
if is_transformed_name(rv_name):
rv_name2 = get_untransformed_name(rv_name)
rv_name = rv_name2 if rv_name2 in valid_name else rv_name
if rv_name in trace.varnames:
if rv_name in idata.posterior:
varnames.append(rv_name)

idata = from_pymc3(trace, log_likelihood=False)
self._ess = ess = ess(idata, var_names=varnames)
self._rhat = rhat = rhat(idata, var_names=varnames)
self._ess = ess = arviz.ess(idata, var_names=varnames)
self._rhat = rhat = arviz.rhat(idata, var_names=varnames)

warnings = []
rhat_max = max(val.max() for val in rhat.values())
Expand All @@ -147,7 +144,7 @@ def _run_convergence_checks(self, trace, model):
warnings.append(warn)

eff_min = min(val.min() for val in ess.values())
n_samples = len(trace) * trace.nchains
n_samples = idata.posterior.sizes['chain'] * idata.posterior.sizes['draw']
if eff_min < 200 and n_samples >= 500:
msg = ("The estimated number of effective samples is smaller than "
"200 for some parameters.")
Expand Down
4 changes: 2 additions & 2 deletions pymc3/backends/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def load(name, model=None):
"""
warnings.warn(
'The `load` function will soon be removed. '
'Please use ArviZ to save traces. '
'Please use `arviz.from_netcdf` to load traces. '
'If you have good reasons for using the `load` function, file an issue and tell us about them. ',
DeprecationWarning,
)
Expand Down Expand Up @@ -239,7 +239,7 @@ def dump(name, trace, chains=None):
"""
warnings.warn(
'The `dump` function will soon be removed. '
'Please use ArviZ to save traces. '
'Please use `arviz.to_netcdf` to save traces. '
'If you have good reasons for using the `dump` function, file an issue and tell us about them. ',
DeprecationWarning,
)
Expand Down
40 changes: 37 additions & 3 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import time
import warnings

import arviz
import numpy as np
import theano.gradient as tg
from theano.tensor import Tensor
Expand Down Expand Up @@ -244,6 +245,8 @@ def sample(
discard_tuned_samples=True,
compute_convergence_checks=True,
callback=None,
*,
return_inferencedata=None,
**kwargs
):
"""Draw samples from the posterior using the given step methods.
Expand Down Expand Up @@ -326,6 +329,10 @@ def sample(
is drawn from.

Sampling can be interrupted by throwing a ``KeyboardInterrupt`` in the callback.
return_inferencedata: bool
AlexAndorra marked this conversation as resolved.
Show resolved Hide resolved
Whether to return the trace as an `arviz.InferenceData` (True) object or a `MultiTrace` (False)
Defaults to `False`, but we'll switch to `True` in version 4.0.0.
michaelosthege marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
trace: pymc3.backends.base.MultiTrace
Expand Down Expand Up @@ -386,6 +393,20 @@ def sample(
if not isinstance(random_seed, Iterable):
raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int")

if return_inferencedata is None:
warnings.warn(
"In v4.0.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. "
michaelosthege marked this conversation as resolved.
Show resolved Hide resolved
"You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.",
FutureWarning
)
# set the default
return_inferencedata = False
if return_inferencedata and not discard_tuned_samples:
raise NotImplementedError(
'arviz.from_pymc3 does not handle warmups yet. See ArviZ issue #1146.'
'For this reason, we can not return InferenceData tat includes warmup draws right now.'
michaelosthege marked this conversation as resolved.
Show resolved Hide resolved
)

if start is not None:
for start_vals in start:
_check_start_shape(model, start_vals)
Expand Down Expand Up @@ -536,14 +557,27 @@ def sample(
)

if compute_convergence_checks:
# convert trace to InferenceData, with awareness of warmup!
# arviz 0.7.0 doesn't ignore warmup draws if they are in the trace! (see arviz #1146)
# that's why here, we make sure to only put actual posterior samples into idata.posterior
trace_warmup = trace[:-n_draws] # <-- may result in len(trace_warmup) == 0
trace_posterior = trace[-n_draws:]
idata = arviz.from_pymc3(trace_posterior, log_likelihood=False)
# save additional sampling metadata
idata.posterior.attrs['n_tune'] = n_tune
idata.posterior.attrs['n_draws'] = n_draws
idata.posterior.attrs['t_sampling'] = t_sampling

if draws - tune < 100:
warnings.warn("The number of samples is too small to check convergence reliably.")
else:
trace.report._run_convergence_checks(trace, model)

trace.report._run_convergence_checks(idata, model)
trace.report._log_summary()

return trace
if return_inferencedata:
return idata
else:
return trace


def _check_start_shape(model, start):
Expand Down