From 4f110906ac622c657dad19ab2742f60ba3f22624 Mon Sep 17 00:00:00 2001 From: Antoine Petit Date: Tue, 21 Feb 2023 10:25:52 +0100 Subject: [PATCH 1/3] Initializing the chain once in posterior_to_xarray The goal of this chang is to prevent large memory consumption during the creation of an InferenceData object from emcee. The change also improves efficiency. --- arviz/data/io_emcee.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arviz/data/io_emcee.py b/arviz/data/io_emcee.py index 88ff7a3540..8162a5758e 100644 --- a/arviz/data/io_emcee.py +++ b/arviz/data/io_emcee.py @@ -116,12 +116,14 @@ def __init__( def posterior_to_xarray(self): """Convert the posterior to an xarray dataset.""" # Use emcee3 syntax, else use emcee2 + # A chain object is created outside of the dictionary to avoid a spike in memory usage + if hasattr(self.sampler, "get_chain"): + chain = self.sampler.get_chain().swapaxes(0, 1) + else: + chain = self.sampler.chain + data = { - var_name: ( - self.sampler.get_chain()[(..., idx)].swapaxes(0, 1) - if hasattr(self.sampler, "get_chain") - else self.sampler.chain[(..., idx)] - ) + var_name: (chain[(..., idx)]) for idx, var_name in zip(self.slices, self.var_names) } return dict_to_dataset( From 88722b5e917ef3ab1f74adbb2511d6bf15f98342 Mon Sep 17 00:00:00 2001 From: Antoine Petit Date: Tue, 21 Feb 2023 10:49:40 +0100 Subject: [PATCH 2/3] Documenting fix --- CHANGELOG.md | 5 +++++ arviz/data/io_emcee.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0045f112d3..6405ff5a69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## v0.x.x Unreleased + +### Maintenance and fixes +- Fix memory usage and improve efficiency for `io_emcee.posterior_to_xarray`. + ## v0.15.0 (2023 Feb 19) ### New features diff --git a/arviz/data/io_emcee.py b/arviz/data/io_emcee.py index 8162a5758e..cba8b904a0 100644 --- a/arviz/data/io_emcee.py +++ b/arviz/data/io_emcee.py @@ -115,8 +115,8 @@ def __init__( def posterior_to_xarray(self): """Convert the posterior to an xarray dataset.""" - # Use emcee3 syntax, else use emcee2 # A chain object is created outside of the dictionary to avoid a spike in memory usage + # Use emcee3 syntax, else use emcee2 if hasattr(self.sampler, "get_chain"): chain = self.sampler.get_chain().swapaxes(0, 1) else: From 695218b0d44ad18248693615c79cbb87bfe8e5d5 Mon Sep 17 00:00:00 2001 From: Oriol Abril-Pla Date: Tue, 21 Feb 2023 18:30:56 +0100 Subject: [PATCH 3/3] Apply suggestions from code review --- CHANGELOG.md | 2 +- arviz/data/io_emcee.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6405ff5a69..48bc470b1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## v0.x.x Unreleased ### Maintenance and fixes -- Fix memory usage and improve efficiency for `io_emcee.posterior_to_xarray`. +- Fix memory usage and improve efficiency in `from_emcee`. ## v0.15.0 (2023 Feb 19) diff --git a/arviz/data/io_emcee.py b/arviz/data/io_emcee.py index cba8b904a0..c0a516fd39 100644 --- a/arviz/data/io_emcee.py +++ b/arviz/data/io_emcee.py @@ -115,15 +115,14 @@ def __init__( def posterior_to_xarray(self): """Convert the posterior to an xarray dataset.""" - # A chain object is created outside of the dictionary to avoid a spike in memory usage # Use emcee3 syntax, else use emcee2 if hasattr(self.sampler, "get_chain"): - chain = self.sampler.get_chain().swapaxes(0, 1) + samples_ary = self.sampler.get_chain().swapaxes(0, 1) else: - chain = self.sampler.chain + samples_ary = self.sampler.chain data = { - var_name: (chain[(..., idx)]) + var_name: (samples_ary[(..., idx)]) for idx, var_name in zip(self.slices, self.var_names) } return dict_to_dataset(