Skip to content

Commit

Permalink
Merge pull request #9 from neurostuff/sample-size
Browse files Browse the repository at this point in the history
Add sample size-based ML/REML estimators
  • Loading branch information
tyarkoni authored Dec 29, 2019
2 parents dd39a57 + f297382 commit c9e4d2e
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 60 deletions.
73 changes: 49 additions & 24 deletions pymare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import pandas as pd

from .estimators import (WeightedLeastSquares, DerSimonianLaird,
LikelihoodBased, StanMetaRegression)
VarianceBasedLikelihoodEstimator,
SampleSizeBasedLikelihoodEstimator,
StanMetaRegression)
from .stats import ensure_2d


Expand All @@ -15,32 +17,38 @@ class Dataset:
Args:
estimates (array-like): 1d array of study-level estimates with length K
variances (array-like): 1d array of study-level variances with length K
variances (array-like, optional): 1d array of study-level variances
with length K
predictors (array-like, optional): 1d or 2d array containing
study-level predictors (or covariates); has dimensions K x P
sample_sizes (array-like, optional): 1d array of study-level sample
sizes (length K)
names ([str], optional): List of length P containing the names of the
predictors
add_intercept (bool, optional): If True, an intercept column is
automatically added to the predictor matrix. If False, the
predictors matrix is passed as-is to estimators.
kwargs (dict, optional): Keyword arguments to pass onto estimators
"""
def __init__(self, estimates, variances, predictors=None, names=None,
add_intercept=True, **kwargs):
def __init__(self, estimates, variances=None, predictors=None,
sample_sizes=None, names=None, add_intercept=True, **kwargs):
self.estimates = ensure_2d(estimates)
self.variances = ensure_2d(variances)
self.sample_sizes = ensure_2d(sample_sizes)
self.kwargs = kwargs
X, n = self._setup_predictors(predictors, names, add_intercept)
X, n = self._get_predictors(predictors, names, add_intercept)
self.predictors = X
self.names = n

def __getattr__(self, key):
# Provide convenient access to stored kwargs.
if key in self.kwargs:
return self.kwargs[key]
raise AttributeError
raise AttributeError("{} object has no attribute {}".format(
self.__class__.__name__, key))

def _setup_predictors(self, X, names, add_intercept):

def _get_predictors(self, X, names, add_intercept):
if X is None and not add_intercept:
raise ValueError("No fixed predictors found. If no X matrix is "
"provided, add_intercept must be True!")
Expand All @@ -67,17 +75,26 @@ def X(self):
"""Alias for the `predictors` attribute."""
return self.predictors

@property
def n(self):
"""Alias for the `sample_sizes` attribute."""
return self.sample_sizes


def meta_regression(estimates, variances, predictors=None, names=None,
add_intercept=True, method='ML', ci_method='QP',
alpha=0.05, **kwargs):
def meta_regression(estimates, variances=None, predictors=None,
sample_sizes=None, names=None, add_intercept=True,
method='ML', ci_method='QP', alpha=0.05, **kwargs):
"""Fits the standard meta-regression/meta-analysis model to provided data.
Args:
estimates ([float]): 1d array of study-level estimates with length K
variances ([float]): 1d array of study-level variances with length K
predictors ([float], optional): 1d or 2d array containing study-level
predictors (or covariates); has dimensions K x P
estimates (array-like): 1d array of study-level estimates with length K
variances (array-like, optional): 1d array of study-level variances
with length K
predictors (array-like, optional): 1d or 2d array containing
study-level predictors (or covariates); has dimensions K x P. If
omitted, add_intercept must be True.
sample_sizes (array-like, optional): 1d array of study-level sample
sizes (length K)
names ([str], optional): List of length P containing the names of the
predictors
add_intercept (bool, optional): If True, an intercept column is
Expand All @@ -102,21 +119,29 @@ def meta_regression(estimates, variances, predictors=None, names=None,
depending on the specified method ('Stan' will return the latter; all
other methods return the former).
"""
dataset = Dataset(estimates, variances, predictors, names, add_intercept)
dataset = Dataset(estimates, variances, predictors, sample_sizes, names,
add_intercept)

method = method.lower()

estimator_cls = {
'ml': partial(LikelihoodBased, method=method),
'reml': partial(LikelihoodBased, method=method),
'dl': DerSimonianLaird,
'wls': WeightedLeastSquares,
'fe': WeightedLeastSquares,
'stan': StanMetaRegression,
}[method]
if method in ['ml', 'reml']:
if variances is not None:
est_cls = partial(VarianceBasedLikelihoodEstimator, method=method)
elif sample_sizes is not None:
est_cls = partial(SampleSizeBasedLikelihoodEstimator, method=method)
else:
raise ValueError("If method is ML or REML, one of `variances` or "
"`sample sizes` must be passed!")
else:
est_cls = {
'dl': DerSimonianLaird,
'wls': WeightedLeastSquares,
'fe': WeightedLeastSquares,
'stan': StanMetaRegression,
}[method]

# Get estimates
est = estimator_cls(**kwargs)
est = est_cls(**kwargs)
results = est.fit(dataset)
if hasattr(results, 'compute_stats'):
results.compute_stats(ci_method=ci_method, alpha=alpha)
Expand Down
7 changes: 5 additions & 2 deletions pymare/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from .estimators import (WeightedLeastSquares, DerSimonianLaird,
LikelihoodBased, StanMetaRegression)
VarianceBasedLikelihoodEstimator,
SampleSizeBasedLikelihoodEstimator,
StanMetaRegression)

__all__ = [
'WeightedLeastSquares',
'DerSimonianLaird',
'LikelihoodBased',
'VarianceBasedLikelihoodEstimator',
'SampleSizeBasedLikelihoodEstimator',
'StanMetaRegression'
]
142 changes: 113 additions & 29 deletions pymare/estimators/estimators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Meta-regression estimator classes."""

from abc import ABCMeta, abstractmethod
from inspect import getfullargspec

import numpy as np
from scipy.optimize import minimize
Expand All @@ -14,14 +15,43 @@ class BaseEstimator(metaclass=ABCMeta):
_result_cls = MetaRegressionResults

@abstractmethod
def _fit(self, y, v, X):
# Subclasses should retain this minimal signature in all cases; this
# ensures that the fit() wrapper in the base class can be ignored by
# users who want to avoid input/output overhead and call _fit directly.
def _fit(self):
# Subclasses must implement _fit() method that directly takes arrays.
# The following named arguments are allowed, and will be automatically
# extracted from the Dataset instance:
# * y (estimates)
# * v (variances)
# * n (sample_sizes)
# * X (predictors)
pass

def accepts_dataset(self, dataset):
""" Returns whether current class can fit the passed Dataset.
Args:
dataset (Dataset): A Dataset instance
Returns:
A boolean.
"""
args = getfullargspec(self._fit)[0][1:]
for name in args:
if getattr(dataset, name) is None:
return False
return True

def fit(self, dataset):
results = self._fit(dataset.y, dataset.v, dataset.X)
kwargs = {}
spec = getfullargspec(self._fit)
n_kw = len(spec.defaults) if spec.defaults else 0
n_args = len(spec.args) - n_kw - 1
for i, name in enumerate(spec.args[1:]):
if i >= n_args:
kwargs[name] = getattr(dataset, name, spec.defaults[i-n_args])
else:
kwargs[name] = getattr(dataset, name)

results = self._fit(**kwargs)
return self._result_cls(results, dataset, self)


Expand Down Expand Up @@ -73,20 +103,16 @@ def _fit(self, y, v, X):
return {'beta': beta_dl, 'tau2': tau_dl}


class LikelihoodBased(BaseEstimator):
""" Likelihood-based meta-regression estimator.
class VarianceBasedLikelihoodEstimator(BaseEstimator):
""" Likelihood-based estimator for estimates with known variances.
Iteratively estimates the between-subject variance tau^2 and fixed effects
Iteratively estimates the between-subject variance tau^2 and fixed effect
betas using the specified likelihood-based estimator (ML or REML).
Args:
method (str, optional): The estimation method to use. Either 'ML' (for
maximum-likelihood) or 'REML' (restricted maximum-likelihood).
Defaults to 'ML'.
beta (array, optional): Initial beta values to use in optimization. If
None (default), the DerSimonian-Laird estimate is used.
tau2 (float, optional): Initial tau^2 value to use in optimization. If
None (default), the DerSimonian-Laird estimate is used.
kwargs (dict, optional): Keyword arguments to pass to the SciPy
minimizer.
Expand All @@ -96,26 +122,22 @@ class LikelihoodBased(BaseEstimator):
passed in as keyword arguments.
"""

def __init__(self, method='ml', beta=None, tau2=None, **kwargs):
self.method = method
self.beta = beta
self.tau2 = tau2
def __init__(self, method='ml', **kwargs):
nll_func = getattr(self, '_{}_nll'.format(method.lower()))
if nll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(method))
self._nll_func = nll_func
self.kwargs = kwargs

def _fit(self, y, v, X):
# use D-L estimate for initial values if none provided
if self.tau2 is None or self.beta is None:
est_DL = DerSimonianLaird()._fit(y, v, X)
beta = est_DL['beta'] if self.beta is None else self.beta
tau2 = est_DL['tau2'] if self.tau2 is None else self.tau2

ll_func = getattr(self, '_{}_nll'.format(self.method.lower()))
if ll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(self.method))
# use D-L estimate for initial values
est_DL = DerSimonianLaird()._fit(y, v, X)
beta = est_DL['beta']
tau2 = est_DL['tau2']

theta_init = np.r_[beta.ravel(), tau2]
res = minimize(ll_func, theta_init, (y, v, X), **self.kwargs).x
res = minimize(self._nll_func, theta_init, (y, v, X), **self.kwargs).x
beta, tau = res[:-1], float(res[-1])
tau = np.max([tau, 0])
return {'beta': beta, 'tau2': tau}
Expand All @@ -127,8 +149,7 @@ def _ml_nll(self, theta, y, v, X):
tau2 = 0
w = 1. / (v + tau2)
R = y - X.dot(beta)
ll = 0.5 * (np.log(w).sum() - (R * w * R).sum())
return -ll
return -0.5 * (np.log(w).sum() - (R * w * R).sum())

def _reml_nll(self, theta, y, v, X):
""" REML negative log-likelihood for meta-regression model. """
Expand All @@ -139,6 +160,69 @@ def _reml_nll(self, theta, y, v, X):
return ll_ + 0.5 * np.log(np.linalg.det(F))


class SampleSizeBasedLikelihoodEstimator(BaseEstimator):
""" Likelihood-based estimator for estimates with known sample sizes but
unknown variances.
Iteratively estimates the between-subject variance tau^2 and fixed effect
betas using the specified likelihood-based estimator (ML or REML).
Args:
method (str, optional): The estimation method to use. Either 'ML' (for
maximum-likelihood) or 'REML' (restricted maximum-likelihood).
Defaults to 'ML'.
beta (array, optional): Initial beta values to use in optimization. If
None (default), uses the weighted least squares estimate.
tau2 (float, optional): Initial tau^2 value to use in optimization.
Defaults to 0.
kwargs (dict, optional): Keyword arguments to pass to the SciPy
minimizer.
Notes:
The ML and REML solutions are obtained via SciPy's scalar function
minimizer (scipy.optimize.minimize). Parameters to minimize() can be
passed in as keyword arguments.
"""

def __init__(self, method='ml', **kwargs):
nll_func = getattr(self, '_{}_nll'.format(method.lower()))
if nll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(method))
self._nll_func = nll_func
self.kwargs = kwargs

def _fit(self, y, n, X):
# set tau^2 to 0 and compute starting values
tau2 = 0.
beta = WeightedLeastSquares(tau2=tau2)._fit(y, n, X)['beta']
sigma = ((y - X.dot(beta))**2 * n).mean()
theta_init = np.r_[beta.ravel(), sigma, tau2]
res = minimize(self._nll_func, theta_init, (y, n, X), **self.kwargs).x
beta, sigma, tau = res[:-2], float(res[-2]), float(res[-1])
tau = np.max([tau, 0])
return {'beta': beta, 'sigma2': sigma, 'tau2': tau}

def _ml_nll(self, theta, y, n, X):
""" ML negative log-likelihood for meta-regression model. """
beta, sigma2, tau2 = theta[:-2, None], theta[-2], theta[-1]
if tau2 < 0:
tau2 = 0
if sigma2 < 0:
sigma2 = 0
w = 1 / (tau2 + sigma2 / n)
R = y - X.dot(beta)
return -0.5 * (np.log(w).sum() - (R * w * R).sum())

def _reml_nll(self, theta, y, n, X):
""" REML negative log-likelihood for meta-regression model. """
ll_ = self._ml_nll(theta, y, n, X)
sigma2, tau2 = theta[-2:]
w = 1 / (tau2 + sigma2 / n)
F = (X * w).T.dot(X)
return ll_ + 0.5 * np.log(np.linalg.det(F))


class StanMetaRegression(BaseEstimator):
"""Bayesian meta-regression estimator using Stan.
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ def dataset(variables):
return Dataset(*variables, names=['my_covariate'])


@pytest.fixture(scope='package')
def dataset_n():
y = np.array([[-3., -0.5, 0., -5.01, 0.35, -2., -6., -4., -4.3, -0.1, -1.]]).T
n = np.array([[16, 16, 20.548, 32.597, 14., 11.118, 4.444, 12.414, 26.963,
130.556, 126.76]]).T / 2
return Dataset(y, sample_sizes=n)


@pytest.fixture(scope='package')
def vars_with_intercept():
y = np.array([[-1, 0.5, 0.5, 0.5, 1, 1, 2, 10]]).T
Expand Down
Loading

0 comments on commit c9e4d2e

Please sign in to comment.