Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge fit() and fit_dataset(). Replace summary() with transform(). Add fit_transform(). #112

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 33 additions & 33 deletions examples/02_meta-analysis/plot_meta-analysis_walkthrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@
#
# Notice that these models don't use :class:`~pymare.core.Dataset` objects.
stouff = estimators.StoufferCombinationTest()
stouff.fit(z[:, None])
stouff._fit(z[:, None])
tsalo marked this conversation as resolved.
Show resolved Hide resolved
print("Stouffers")
print("p: {}".format(stouff.params_["p"]))
print()

fisher = estimators.FisherCombinationTest()
fisher.fit(z[:, None])
fisher._fit(z[:, None])
print("Fishers")
print("p: {}".format(fisher.params_["p"]))

Expand All @@ -162,11 +162,11 @@
# This estimator does not attempt to estimate between-study variance.
# Instead, it takes ``tau2`` (:math:`\tau^{2}`) as an argument.
wls = estimators.WeightedLeastSquares()
wls.fit_dataset(dset)
wls_summary = wls.summary()
results["Weighted Least Squares"] = wls_summary.to_df()
wls.fit(dset)
wls_results = wls.transform()
tsalo marked this conversation as resolved.
Show resolved Hide resolved
results["Weighted Least Squares"] = wls_results.to_df()
print("Weighted Least Squares")
print(wls_summary.to_df().T)
print(wls_results.to_df().T)

###############################################################################
# Methods that estimate between-study variance
Expand All @@ -180,54 +180,54 @@
# can use either maximum-likelihood (ML) or restricted maximum-likelihood (REML)
# to iteratively estimate it.
dsl = estimators.DerSimonianLaird()
dsl.fit_dataset(dset)
dsl_summary = dsl.summary()
results["DerSimonian-Laird"] = dsl_summary.to_df()
dsl.fit(dset)
dsl_results = dsl.transform()
results["DerSimonian-Laird"] = dsl_results.to_df()
print("DerSimonian-Laird")
print(dsl_summary.to_df().T)
print(dsl_results.to_df().T)
print()

hedge = estimators.Hedges()
hedge.fit_dataset(dset)
hedge_summary = hedge.summary()
results["Hedges"] = hedge_summary.to_df()
hedge.fit(dset)
hedge_results = hedge.transform()
results["Hedges"] = hedge_results.to_df()
print("Hedges")
print(hedge_summary.to_df().T)
print(hedge_results.to_df().T)
print()

vb_ml = estimators.VarianceBasedLikelihoodEstimator(method="ML")
vb_ml.fit_dataset(dset)
vb_ml_summary = vb_ml.summary()
results["Variance-Based with ML"] = vb_ml_summary.to_df()
vb_ml.fit(dset)
vb_ml_results = vb_ml.transform()
results["Variance-Based with ML"] = vb_ml_results.to_df()
print("Variance-Based with ML")
print(vb_ml_summary.to_df().T)
print(vb_ml_results.to_df().T)
print()

vb_reml = estimators.VarianceBasedLikelihoodEstimator(method="REML")
vb_reml.fit_dataset(dset)
vb_reml_summary = vb_reml.summary()
results["Variance-Based with REML"] = vb_reml_summary.to_df()
vb_reml.fit(dset)
vb_reml_results = vb_reml.transform()
results["Variance-Based with REML"] = vb_reml_results.to_df()
print("Variance-Based with REML")
print(vb_reml_summary.to_df().T)
print(vb_reml_results.to_df().T)
print()

# The ``SampleSizeBasedLikelihoodEstimator`` estimates between-study variance
# using ``y`` and ``n``, but assumes within-study variance is homogenous
# across studies.
sb_ml = estimators.SampleSizeBasedLikelihoodEstimator(method="ML")
sb_ml.fit_dataset(dset)
sb_ml_summary = sb_ml.summary()
results["Sample Size-Based with ML"] = sb_ml_summary.to_df()
sb_ml.fit(dset)
sb_ml_results = sb_ml.transform()
results["Sample Size-Based with ML"] = sb_ml_results.to_df()
print("Sample Size-Based with ML")
print(sb_ml_summary.to_df().T)
print(sb_ml_results.to_df().T)
print()

sb_reml = estimators.SampleSizeBasedLikelihoodEstimator(method="REML")
sb_reml.fit_dataset(dset)
sb_reml_summary = sb_reml.summary()
results["Sample Size-Based with REML"] = sb_reml_summary.to_df()
sb_reml.fit(dset)
sb_reml_results = sb_reml.transform()
results["Sample Size-Based with REML"] = sb_reml_results.to_df()
print("Sample Size-Based with REML")
print(sb_reml_summary.to_df().T)
print(sb_reml_results.to_df().T)

###############################################################################
# What about the Stan estimator?
Expand All @@ -240,10 +240,10 @@
# `````````````````````````````````````````````````````````````````````````````
fig, ax = plt.subplots(figsize=(6, 6))

for i, (estimator_name, summary_df) in enumerate(results.items()):
ax.scatter((summary_df.loc[0, "estimate"],), (i + 1,), label=estimator_name)
for i, (estimator_name, results_df) in enumerate(results.items()):
ax.scatter((results_df.loc[0, "estimate"],), (i + 1,), label=estimator_name)
ax.plot(
(summary_df.loc[0, "ci_0.025"], summary_df.loc[0, "ci_0.975"]),
(results_df.loc[0, "ci_0.025"], results_df.loc[0, "ci_0.975"]),
(i + 1, i + 1),
linewidth=3,
)
Expand Down
6 changes: 3 additions & 3 deletions examples/02_meta-analysis/plot_run_meta-analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@
# so it can be useful for large-scale meta-analyses,
# such as neuroimaging image-based meta-analyses.
#
# The :meth:`~pymare.estimators.estimators.BaseEstimator.summary` function
# The :meth:`~pymare.estimators.estimators.BaseEstimator.transform` function
# will return a :class:`~pymare.results.MetaRegressionResults` object,
# which contains the results of the analysis.
est = estimators.WeightedLeastSquares().fit_dataset(dset)
results = est.summary()
est = estimators.WeightedLeastSquares().fit(dset)
results = est.transform()
results.to_df()

###############################################################################
Expand Down
3 changes: 1 addition & 2 deletions pymare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,5 +260,4 @@ def meta_regression(

# Get estimates
est = est_cls(**kwargs)
est.fit_dataset(data)
return est.summary()
return est.fit_transform(data)
12 changes: 6 additions & 6 deletions pymare/estimators/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def p_value(self, z, *args, **kwargs):
def _z_to_p(self, z):
return ss.norm.sf(z)

def fit(self, z, *args, **kwargs):
def _fit(self, z, *args, **kwargs):
"""Fit the estimator to z-values."""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None
Expand All @@ -51,13 +51,13 @@ def fit(self, z, *args, **kwargs):
self.params_ = {"p": p}
return self

def summary(self):
"""Generate a summary of the estimator results."""
def transform(self):
"""Generate a transform of the estimator results."""
if not hasattr(self, "params_"):
name = self.__class__.__name__
raise ValueError(
"This {} instance hasn't been fitted yet. Please "
"call fit() before summary().".format(name)
"call _fit() of fit() before transform().".format(name)
tsalo marked this conversation as resolved.
Show resolved Hide resolved
)
return CombinationTestResults(self, self.dataset_, p=self.params_["p"])

Expand Down Expand Up @@ -112,9 +112,9 @@ class StoufferCombinationTest(CombinationTest):
# Maps Dataset attributes onto fit() args; see BaseEstimator for details.
_dataset_attr_map = {"z": "y", "w": "v"}

def fit(self, z, w=None):
def _fit(self, z, w=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why make this private?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this one was tricky because fit() alone only works with datasets while _fit() is fitted to arrays. This was a solution that worked, but I'm sure there is a more elegant one that can make that distinction clearer.
Would you suggest a different approach? Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest, I don't remember if I had considered how fitting to arrays would work with the proposed changes... I wish I'd made a note about it in #103.

We don't want users to call a private method, so I think the options are to either make the array-fitting method a different public method (e.g., fit_array) or to support arrays in fit. WDYT?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if fit() alone cannot distinguish between an array or dataset we should keep fit() only for arrays for the sake of scikit-learn styling, and fit_dataset() for datasets.
Does that mean we need to add fit_dataset_transform() in addition to fit_transform()?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want the Dataset to be the default (unless I'm misremembering), so I think fit_array and fit would be better than fit and fit_dataset. It's a difficult thing though, so maybe we should bring in the rest of the neurostore team on this?

"""Fit the estimator to z-values, optionally with weights."""
return super().fit(z, w=w)
return super()._fit(z, w=w)

def p_value(self, z, w=None):
"""Calculate p-values."""
Expand Down
50 changes: 33 additions & 17 deletions pymare/estimators/estimators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Meta-regression estimator classes."""

from abc import ABCMeta, abstractmethod
from abc import ABCMeta
from inspect import getfullargspec
from warnings import warn

Expand Down Expand Up @@ -62,12 +62,7 @@ class BaseEstimator(metaclass=ABCMeta):
# (e.g., 'z').
_dataset_attr_map = {}

@abstractmethod
def fit(self, *args, **kwargs):
"""Fit the estimator to data."""
pass

def fit_dataset(self, dataset, *args, **kwargs):
def fit(self, dataset, *args, **kwargs):
"""Apply the current estimator to the passed Dataset container.

A convenience interface that wraps fit() and automatically aligns the
Expand All @@ -83,7 +78,7 @@ def fit_dataset(self, dataset, *args, **kwargs):
Optional keyword arguments to pass onto the :meth:`~pymare.core.Dataset.fit` method.
"""
all_kwargs = {}
spec = getfullargspec(self.fit)
spec = getfullargspec(self._fit)
n_kw = len(spec.defaults) if spec.defaults else 0
n_args = len(spec.args) - n_kw - 1

Expand All @@ -96,7 +91,7 @@ def fit_dataset(self, dataset, *args, **kwargs):
all_kwargs[name] = getattr(dataset, attr_name)

all_kwargs.update(kwargs)
self.fit(*args, **all_kwargs)
self._fit(*args, **all_kwargs)
self.dataset_ = dataset

return self
Expand Down Expand Up @@ -140,7 +135,7 @@ def get_v(self, dataset):

return self.params_["sigma2"] / dataset.n

def summary(self):
def transform(self):
"""Generate a MetaRegressionResults object for the fitted estimator.

Returns
Expand All @@ -157,6 +152,27 @@ def summary(self):
p = self.params_
return MetaRegressionResults(self, self.dataset_, p["fe_params"], p["inv_cov"], p["tau2"])

def fit_transform(self, dataset, *args, **kwargs):
"""Fit to data, then transform it.

Fits transformer dataset and returns a MetaRegressionResults object for the
fitted estimator.

Parameters
----------
dataset : :obj:`~pymare.core.Dataset`
A PyMARE Dataset instance holding the data.
*args
Optional positional arguments to pass onto the :meth:`~pymare.core.Dataset.fit` method.
**kwargs
Optional keyword arguments to pass onto the :meth:`~pymare.core.Dataset.fit` method.

Returns
-------
:obj:`~pymare.results.MetaRegressionResults`
"""
return self.fit(dataset, *args, **kwargs).transform()


class WeightedLeastSquares(BaseEstimator):
"""Weighted least-squares meta-regression.
Expand Down Expand Up @@ -190,7 +206,7 @@ class WeightedLeastSquares(BaseEstimator):
def __init__(self, tau2=0.0):
self.tau2 = tau2

def fit(self, y, X, v=None):
def _fit(self, y, X, v=None):
"""Fit the estimator to data.

Parameters
Expand All @@ -206,7 +222,7 @@ def fit(self, y, X, v=None):
-------
:obj:`~pymare.estimators.WeightedLeastSquares`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
# This resets the Estimator's dataset_ attribute. fit will overwrite if called.
self.dataset_ = None

if v is None:
Expand Down Expand Up @@ -235,7 +251,7 @@ class DerSimonianLaird(BaseEstimator):
.. footbibliography::
"""

def fit(self, y, v, X):
def _fit(self, y, v, X):
"""Fit the estimator to data.

Parameters
Expand Down Expand Up @@ -299,7 +315,7 @@ class Hedges(BaseEstimator):
.. footbibliography::
"""

def fit(self, y, v, X):
def _fit(self, y, v, X):
"""Fit the estimator to data.

Parameters
Expand Down Expand Up @@ -367,7 +383,7 @@ def __init__(self, method="ml", **kwargs):
self.kwargs = kwargs

@_loopable
def fit(self, y, v, X):
def _fit(self, y, v, X):
"""Fit the estimator to data.

Parameters
Expand All @@ -387,7 +403,7 @@ def fit(self, y, v, X):
self.dataset_ = None

# use D-L estimate for initial values
est_DL = DerSimonianLaird().fit(y, v, X).params_
est_DL = DerSimonianLaird()._fit(y, v, X).params_
beta = est_DL["fe_params"]
tau2 = est_DL["tau2"]

Expand Down Expand Up @@ -460,7 +476,7 @@ def __init__(self, method="ml", **kwargs):
self.kwargs = kwargs

@_loopable
def fit(self, y, n, X):
def _fit(self, y, n, X):
"""Fit the estimator to data.

Parameters
Expand Down
10 changes: 5 additions & 5 deletions pymare/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class MetaRegressionResults:

Warning
-------
When an Estimator is fitted to arrays directly using the ``fit`` method, the Results object's
When an Estimator is fitted to arrays directly using the ``_fit`` method, the Results object's
utility is limited.
Many methods will not work.
"""
Expand Down Expand Up @@ -336,7 +336,7 @@ def permutation_test(self, n_perm=1000):
y_perm = np.repeat(y[:, None], n_perm, axis=1)

# for v, we might actually be working with n, depending on estimator
has_v = "v" in getfullargspec(self.estimator.fit).args[1:]
has_v = "v" in getfullargspec(self.estimator._fit).args[1:]
v = self.dataset.v[:, i] if has_v else self.dataset.n[:, i]

v_perm = np.repeat(v[:, None], n_perm, axis=1)
Expand All @@ -362,7 +362,7 @@ def permutation_test(self, n_perm=1000):
# Pass parameters, remembering that v may actually be n
kwargs = {"y": y_perm, "X": self.dataset.X}
kwargs["v" if has_v else "n"] = v_perm
params = self.estimator.fit(**kwargs).params_
params = self.estimator._fit(**kwargs).params_

fe_obs = fe_stats["est"][:, i]
if fe_obs.ndim == 1:
Expand Down Expand Up @@ -483,9 +483,9 @@ def permutation_test(self, n_perm=1000):

# Some combination tests can handle weights (passed as v)
kwargs = {"z": y_perm}
if "w" in getfullargspec(est.fit).args:
if "w" in getfullargspec(est._fit).args:
kwargs["w"] = self.dataset.v
params = est.fit(**kwargs).params_
params = est._fit(**kwargs).params_

p_obs = self.z[i]
if p_obs.ndim == 1:
Expand Down
2 changes: 1 addition & 1 deletion pymare/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def q_profile(y, v, X, alpha=0.05):
# value, minimize() sometimes fails to stay in bounds.
from .estimators import DerSimonianLaird

ub_start = 2 * DerSimonianLaird().fit(y, v, X).params_["tau2"]
ub_start = 2 * DerSimonianLaird()._fit(y, v, X).params_["tau2"]

lb = minimize(lambda x: (q_gen(*args, x) - l_crit) ** 2, [0], bounds=bds).x[0]
ub = minimize(lambda x: (q_gen(*args, x) - u_crit) ** 2, ub_start, bounds=bds).x[0]
Expand Down
6 changes: 3 additions & 3 deletions pymare/tests/test_combination_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
@pytest.mark.parametrize("Cls,data,mode,expected", _params)
def test_combination_test(Cls, data, mode, expected):
"""Test CombinationTest Estimators with numpy data."""
results = Cls(mode).fit(data).params_
results = Cls(mode)._fit(data).params_
z = ss.norm.isf(results["p"])
assert np.allclose(z, expected, atol=1e-5)

Expand All @@ -37,7 +37,7 @@ def test_combination_test(Cls, data, mode, expected):
def test_combination_test_from_dataset(Cls, data, mode, expected):
"""Test CombinationTest Estimators with PyMARE Datasets."""
dset = Dataset(y=data)
est = Cls(mode).fit_dataset(dset)
results = est.summary()
est = Cls(mode).fit(dset)
results = est.transform()
z = ss.norm.isf(results.p)
assert np.allclose(z, expected, atol=1e-5)
Loading