Skip to content

Commit

Permalink
fixed some bugs related to merging with shap and new attribute names
Browse files Browse the repository at this point in the history
linting

fixed failing tests

fixed notebooks

fixed ortholearner docstring

reverted to allowing n_crossfit_splits and raising deprecation warning. fixed ortholearner doctest

fixed bugs for failing tests

fixed failing test bugs

added extra kwargs to _strata

old sklearn crashes with new scipy. we can revert once we enforce new sklearn

disallowing bootstrap inference when refitting

added many refit tests. fixed some leftover bugs based on new tests

typo in tests

changed monte_carlo_iterations to mc_iters and added mc_agg parameter to change the aggregation method {mean, median}

fixed nuisance aggregation when nuisances have different dimensions

removed _models_nuisance initilaization at init.

added rlearner residuals_ property

fixed flaky cate interpreter test
  • Loading branch information
vasilismsr committed Jan 4, 2021
1 parent 72a297a commit ba0dbf0
Show file tree
Hide file tree
Showing 19 changed files with 1,022 additions and 333 deletions.
167 changes: 151 additions & 16 deletions econml/_ortho_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class in this module implements the general logic in a very versatile way

from .cate_estimator import (BaseCateEstimator, LinearCateEstimator,
TreatmentExpansionMixin)
from .inference import BootstrapInference
from .utilities import (_deprecate_positional, _EncoderWrapper, check_input_arrays,
cross_product, filter_none_kwargs,
inverse_onehot, ndim, reshape, shape, transpose)
Expand Down Expand Up @@ -281,9 +282,130 @@ class _OrthoLearner(ABC, TreatmentExpansionMixin, LinearCateEstimator):
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
monte_carlo_iterations: int, optional (default=None)
mc_iters: int, optional (default=None)
The number of times to rerun the first stage models to reduce the variance of the nuisances.
mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.
Examples
--------
The example code below implements a very simple version of the double machine learning
method on top of the :class:`._OrthoLearner` class, for expository purposes.
For a more elaborate implementation of a Double Machine Learning child class of the class
:class:`._OrthoLearner` check out :class:`.DML`
and its child classes:
.. testcode::
import numpy as np
from sklearn.linear_model import LinearRegression
from econml._ortho_learner import _OrthoLearner
class ModelNuisance:
def __init__(self, model_t, model_y):
self._model_t = model_t
self._model_y = model_y
def fit(self, Y, T, W=None):
self._model_t.fit(W, T)
self._model_y.fit(W, Y)
return self
def predict(self, Y, T, W=None):
return Y - self._model_y.predict(W), T - self._model_t.predict(W)
class ModelFinal:
def __init__(self):
return
def fit(self, Y, T, W=None, nuisances=None):
Y_res, T_res = nuisances
self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
return self
def predict(self, X=None):
return self.model.coef_[0]
def score(self, Y, T, W=None, nuisances=None):
Y_res, T_res = nuisances
return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)
class OrthoLearner(_OrthoLearner):
def _gen_ortho_learner_model_nuisance(self):
return ModelNuisance(LinearRegression(), LinearRegression())
def _gen_ortho_learner_model_final(self):
return ModelFinal()
np.random.seed(123)
X = np.random.normal(size=(100, 3))
y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,))
est = OrthoLearner(n_splits=2, discrete_treatment=False, discrete_instrument=False,
categories='auto', random_state=None)
est.fit(y, X[:, 0], W=X[:, 1:])
>>> est.score_
0.00756830...
>>> est.const_marginal_effect()
1.02364992...
>>> est.effect()
array([1.023649...])
>>> est.effect(T0=0, T1=10)
array([10.236499...])
>>> est.score(y, X[:, 0], W=X[:, 1:])
0.00727995...
>>> est.ortho_learner_model_final.model
LinearRegression(fit_intercept=False)
>>> est.ortho_learner_model_final.model.coef_
array([1.023649...])
The following example shows how to do double machine learning with discrete treatments, using
the _OrthoLearner:
.. testcode::
class ModelNuisance:
def __init__(self, model_t, model_y):
self._model_t = model_t
self._model_y = model_y
def fit(self, Y, T, W=None):
self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1)))
self._model_y.fit(W, Y)
return self
def predict(self, Y, T, W=None):
return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:]
class ModelFinal:
def __init__(self):
return
def fit(self, Y, T, W=None, nuisances=None):
Y_res, T_res = nuisances
self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)
return self
def predict(self):
# theta needs to be of dimension (1, d_t) if T is (n, d_t)
return np.array([[self.model.coef_[0]]])
def score(self, Y, T, W=None, nuisances=None):
Y_res, T_res = nuisances
return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)
from sklearn.linear_model import LogisticRegression
class OrthoLearner(_OrthoLearner):
def _gen_ortho_learner_model_nuisance(self):
return ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression())
def _gen_ortho_learner_model_final(self):
return ModelFinal()
np.random.seed(123)
W = np.random.normal(size=(100, 3))
import scipy.special
T = np.random.binomial(1, scipy.special.expit(W[:, 0]))
y = T + W[:, 0] + np.random.normal(0, 0.01, size=(100,))
est = OrthoLearner(n_splits=2, discrete_treatment=True, discrete_instrument=False,
categories='auto', random_state=None)
est.fit(y, T, W=W)
>>> est.score_
0.00673015...
>>> est.const_marginal_effect()
array([[1.008401...]])
>>> est.effect()
array([1.008401...])
>>> est.score(y, T, W=W)
0.00310431...
>>> est.ortho_learner_model_final.model.coef_[0]
1.00840170...
Attributes
----------
models_nuisance: list of objects of type(model_nuisance)
Expand All @@ -301,14 +423,14 @@ class _OrthoLearner(ABC, TreatmentExpansionMixin, LinearCateEstimator):

def __init__(self, *,
discrete_treatment, discrete_instrument, categories, n_splits, random_state,
monte_carlo_iterations=None):
self._models_nuisance = None
mc_iters=None, mc_agg='mean'):
self.n_splits = n_splits
self.discrete_treatment = discrete_treatment
self.discrete_instrument = discrete_instrument
self.random_state = random_state
self.categories = categories
self.monte_carlo_iterations = monte_carlo_iterations
self.mc_iters = mc_iters
self.mc_agg = mc_agg
super().__init__()

@abstractmethod
Expand Down Expand Up @@ -350,13 +472,13 @@ def _gen_ortho_learner_model_final(self):
sample_weight=sample_weight, sample_var=sample_var)
model_final.predict(X=X)
Predict, should just take the features X and return the constant marginal effect. In fact we allow for the
model method signatures to skip any of the keyword arguments as long as the class is always called with the
omitted keyword argument set to ``None``. Moreover, the predict function of the final model can take no
argument if the class is always called with ``X=None``. This can be enforced in child classes by
re-implementing the fit and the various effect methods. If ``discrete_treatment=True``, then the input ``T``
to both above calls will be the one-hot encoding of the original input ``T``, excluding the first column of
the one-hot.
Predict, should just take the features X and return the constant marginal effect. In fact we allow
for the model method signatures to skip any of the keyword arguments as long as the class is always
called with the omitted keyword argument set to ``None``. Moreover, the predict function of the final
model can take no argument if the class is always called with ``X=None``. This can be enforced in child
classes by re-implementing the fit and the various effect methods. If ``discrete_treatment=True``,
then the input ``T`` to both above calls will be the one-hot encoding of the original input ``T``,
excluding the first column of the one-hot.
"""
pass

Expand Down Expand Up @@ -390,7 +512,7 @@ def _subinds_check_none(self, var, inds):

def _strata(self, Y, T, X=None, W=None, Z=None,
sample_weight=None, sample_var=None, groups=None,
cache_values=False, monte_carlo_iterations=None):
cache_values=False, only_final=False, check_input=True):
if self.discrete_instrument:
Z = LabelEncoder().fit_transform(np.ravel(Z))

Expand Down Expand Up @@ -492,17 +614,24 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No
all_nuisances = []
fitted_inds = None

for _ in range(self.monte_carlo_iterations or 1):
for _ in range(self.mc_iters or 1):
nuisances, new_inds = self._fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight, groups=groups)
all_nuisances.append(nuisances)
if fitted_inds is None:
fitted_inds = new_inds
elif not np.array_equal(fitted_inds, new_inds):
raise AttributeError("Different indices were fit by different folds, so they cannot be aggregated")

if self.monte_carlo_iterations is not None:
# TODO: support different ways to aggregate, like median?
nuisances = np.mean(np.array(all_nuisances), axis=0)
if self.mc_iters is not None:
if self.mc_agg == 'mean':
nuisances = tuple(np.mean(nuisance_mc_variants, axis=0)
for nuisance_mc_variants in list(zip(*all_nuisances)))
elif self.mc_agg == 'median':
nuisances = tuple(np.median(nuisance_mc_variants, axis=0)
for nuisance_mc_variants in list(zip(*all_nuisances)))
else:
raise ValueError(
"Parameter `mc_agg` must be one of {'mean', 'median'}. Got {}".format(self.mc_agg))

Y, T, X, W, Z, sample_weight, sample_var = (self._subinds_check_none(arr, fitted_inds)
for arr in (Y, T, X, W, Z, sample_weight, sample_var))
Expand All @@ -526,6 +655,10 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No

return self

@property
def _illegal_refit_inference_methods(self):
return (BootstrapInference,)

def refit(self, inference=None):
"""
Estimate the counterfactual model using a new final model specification but with cached first stage results.
Expand All @@ -539,6 +672,8 @@ def refit(self, inference=None):
This instance
"""
assert self._cached_values, "Refit can only be called if values were cached during the original fit"
if isinstance(self._get_inference(inference), self._illegal_refit_inference_methods):
raise ValueError("The chosen inference method does not allow only for model final re-fitting.")
cached = self._cached_values
kwargs = filter_none_kwargs(
Y=cached.Y, T=cached.T, X=cached.X, W=cached.W, Z=cached.Z,
Expand Down
26 changes: 23 additions & 3 deletions econml/_rlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,13 @@ class _RLearner(_OrthoLearner):
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
monte_carlo_iterations: int, optional
mc_iters: int, optional
The number of times to rerun the first stage models to reduce the variance of the nuisances.
mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.
Attributes
----------
models_y: list of objects of type(model_y)
Expand All @@ -198,13 +202,14 @@ class _RLearner(_OrthoLearner):
is multidimensional, then the average of the MSEs for each dimension of Y is returned.
"""

def __init__(self, *, discrete_treatment, categories, n_splits, random_state, monte_carlo_iterations=None):
def __init__(self, *, discrete_treatment, categories, n_splits, random_state, mc_iters=None, mc_agg='mean'):
super().__init__(discrete_treatment=discrete_treatment,
discrete_instrument=False, # no instrument, so doesn't matter
categories=categories,
n_splits=n_splits,
random_state=random_state,
monte_carlo_iterations=monte_carlo_iterations)
mc_iters=mc_iters,
mc_agg=mc_agg)

@abstractmethod
def _gen_model_y(self):
Expand Down Expand Up @@ -348,3 +353,18 @@ def nuisance_scores_y(self):
@property
def nuisance_scores_t(self):
return self.nuisance_scores_[1]

@property
def residuals_(self):
"""
A tuple (y_res, T_res, X, W), of the residuals from the first stage estimation
along with the associated X and W. Samples are not guaranteed to be in the same
order as the input order.
"""
if not hasattr(self, '_cached_values'):
raise AttributeError("Estimator is not fitted yet!")
if self._cached_values is None:
raise AttributeError("`fit` was called with `cache_values=False`. "
"Set to `True` to enable residual storage.")
Y_res, T_res = self._cached_values.nuisances
return Y_res, T_res, self._cached_values.X, self._cached_values.W
29 changes: 16 additions & 13 deletions econml/cate_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,11 @@ class LinearModelFinalCateEstimatorMixin(BaseCateEstimator):
"""
Base class for models where the final stage is a linear model.
Subclasses must expose a ``model_final`` attribute containing the model's
final stage model.
Such an estimator must implement a :attr:`model_final_` attribute that points
to the fitted final :class:`.StatsModelsLinearRegression` object that
represents the fitted CATE model. Also must implement :attr:`featurizer_` that points
to the fitted featurizer and :attr:`bias_part_of_coef` that designates
if the intercept is the first element of the :attr:`model_final_` coefficient.
Attributes
----------
Expand Down Expand Up @@ -561,7 +564,7 @@ def coef_(self):
"""
return parse_final_model_params(self.model_final_.coef_, self.model_final_.intercept_,
self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef,
self.fit_cate_intercept)[0]
self.fit_cate_intercept_)[0]

@property
def intercept_(self):
Expand All @@ -576,11 +579,11 @@ def intercept_(self):
a vector and not a 2D array. For binary treatment the n_t dimension is
also omitted.
"""
if not self.fit_cate_intercept:
if not self.fit_cate_intercept_:
raise AttributeError("No intercept was fitted!")
return parse_final_model_params(self.model_final_.coef_, self.model_final_.intercept_,
self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef,
self.fit_cate_intercept)[1]
self.fit_cate_intercept_)[1]

@BaseCateEstimator._defer_to_inference
def coef__interval(self, *, alpha=0.1):
Expand Down Expand Up @@ -718,16 +721,14 @@ def summary(self, alpha=0.1, value=0, decimals=3, feature_names=None, treatment_

def shap_values(self, X, *, feature_names=None, treatment_names=None, output_names=None):
(dt, dy, treatment_names, output_names) = _define_names(self._d_t, self._d_y, treatment_names, output_names)
if hasattr(self, "featurizer") and self.featurizer is not None:
X = self.featurizer.transform(X)
if hasattr(self, "featurizer_") and self.featurizer_ is not None:
X = self.featurizer_.transform(X)
X, T = broadcast_unit_treatments(X, dt)
d_x = X.shape[1]
X_new = cross_product(X, T)
feature_names = self.cate_feature_names(feature_names)
return _shap_explain_joint_linear_model_cate(self.model_final, X_new, T, dt, dy, self.fit_cate_intercept,
return _shap_explain_joint_linear_model_cate(self.model_final_, X_new, T, dt, dy, self.bias_part_of_coef,
feature_names=feature_names, treatment_names=treatment_names,
output_names=output_names)

shap_values.__doc__ = LinearCateEstimator.shap_values.__doc__


Expand All @@ -736,9 +737,11 @@ class StatsModelsCateEstimatorMixin(LinearModelFinalCateEstimatorMixin):
Mixin class that offers `inference='statsmodels'` options to the CATE estimator
that inherits it.
Such an estimator must implement a :attr:`model_final` attribute that points
Such an estimator must implement a :attr:`model_final_` attribute that points
to the fitted final :class:`.StatsModelsLinearRegression` object that
represents the fitted CATE model.
represents the fitted CATE model. Also must implement :attr:`featurizer_` that points
to the fitted featurizer and :attr:`bias_part_of_coef` that designates
if the intercept is the first element of the :attr:`model_final_` coefficient.
"""

def _get_inference_options(self):
Expand Down Expand Up @@ -822,7 +825,7 @@ def intercept_(self, T):
-------
intercept: float or (n_y,) array like
"""
if not self.fit_cate_intercept:
if not self.fit_cate_intercept_:
raise AttributeError("No intercept was fitted!")
_, T = self._expand_treatments(None, T)
ind = inverse_onehot(T).item() - 1
Expand Down
Loading

0 comments on commit ba0dbf0

Please sign in to comment.