diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 13e5489cc..67bbd9115 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -27,6 +27,7 @@ class in this module implements the general logic in a very versatile way import copy from collections import namedtuple from warnings import warn +from abc import ABC, abstractmethod import numpy as np from sklearn.base import clone @@ -189,10 +190,10 @@ def predict(self, X, y, W=None): CachedValues = namedtuple('_CachedValues', ['nuisances', - 'Y', 'T', 'X', 'W', 'Z', 'sample_weight', 'sample_var']) + 'Y', 'T', 'X', 'W', 'Z', 'sample_weight', 'sample_var', 'groups']) -class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): +class _OrthoLearner(ABC, TreatmentExpansionMixin, LinearCateEstimator): """ Base class for all orthogonal learners. This class is a parent class to any method that has the following architecture: @@ -247,39 +248,6 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): Parameters ---------- - model_nuisance: estimator - The estimator for fitting the nuisance function. Must implement - `fit` and `predict` methods that both have signatures:: - - model_nuisance.fit(Y, T, X=X, W=W, Z=Z, - sample_weight=sample_weight, sample_var=sample_var) - model_nuisance.predict(Y, T, X=X, W=W, Z=Z, - sample_weight=sample_weight, sample_var=sample_var) - - In fact we allow for the model method signatures to skip any of the keyword arguments - as long as the class is always called with the omitted keyword argument set to ``None``. - This can be enforced in child classes by re-implementing the fit and the various effect - methods. If ``discrete_treatment=True``, then the input ``T`` to both above calls will be the - one-hot encoding of the original input ``T``, excluding the first column of the one-hot. - - If the estimator also provides a score method with the same arguments as fit, it will be used to - calculate scores during training. - - model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods that must have signatures:: - - model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, - sample_weight=sample_weight, sample_var=sample_var) - model_final.predict(X=X) - - Predict, should just take the features X and return the constant marginal effect. In fact we allow for the - model method signatures to skip any of the keyword arguments as long as the class is always called with the - omitted keyword argument set to ``None``. Moreover, the predict function of the final model can take no - argument if the class is always called with ``X=None``. This can be enforced in child classes by - re-implementing the fit and the various effect methods. If ``discrete_treatment=True``, then the input ``T`` - to both above calls will be the one-hot encoding of the original input ``T``, excluding the first column of - the one-hot. - discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities @@ -313,127 +281,9 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by :mod:`np.random`. - monte_carlo_iterations: int, optional + monte_carlo_iterations: int, optional (default=None) The number of times to rerun the first stage models to reduce the variance of the nuisances. - Examples - -------- - - The example code below implements a very simple version of the double machine learning - method on top of the :class:`._OrthoLearner` class, for expository purposes. - For a more elaborate implementation of a Double Machine Learning child class of the class - :class:`._OrthoLearner` check out :class:`.DML` - and its child classes: - - .. testcode:: - - import numpy as np - from sklearn.linear_model import LinearRegression - from econml._ortho_learner import _OrthoLearner - class ModelNuisance: - def __init__(self, model_t, model_y): - self._model_t = model_t - self._model_y = model_y - def fit(self, Y, T, W=None): - self._model_t.fit(W, T) - self._model_y.fit(W, Y) - return self - def predict(self, Y, T, W=None): - return Y - self._model_y.predict(W), T - self._model_t.predict(W) - class ModelFinal: - def __init__(self): - return - def fit(self, Y, T, W=None, nuisances=None): - Y_res, T_res = nuisances - self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) - return self - def predict(self, X=None): - return self.model.coef_[0] - def score(self, Y, T, W=None, nuisances=None): - Y_res, T_res = nuisances - return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) - np.random.seed(123) - X = np.random.normal(size=(100, 3)) - y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,)) - est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), - ModelFinal(), - n_splits=2, discrete_treatment=False, discrete_instrument=False, - categories='auto', random_state=None) - est.fit(y, X[:, 0], W=X[:, 1:]) - - >>> est.score_ - 0.00756830... - >>> est.const_marginal_effect() - 1.02364992... - >>> est.effect() - array([1.023649...]) - >>> est.effect(T0=0, T1=10) - array([10.236499...]) - >>> est.score(y, X[:, 0], W=X[:, 1:]) - 0.00727995... - >>> est.model_final.model - LinearRegression(fit_intercept=False) - >>> est.model_final.model.coef_ - array([1.023649...]) - - The following example shows how to do double machine learning with discrete treatments, using - the _OrthoLearner: - - .. testcode:: - - class ModelNuisance: - def __init__(self, model_t, model_y): - self._model_t = model_t - self._model_y = model_y - - def fit(self, Y, T, W=None): - self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1))) - self._model_y.fit(W, Y) - return self - - def predict(self, Y, T, W=None): - return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:] - - class ModelFinal: - - def __init__(self): - return - - def fit(self, Y, T, W=None, nuisances=None): - Y_res, T_res = nuisances - self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) - return self - - def predict(self): - # theta needs to be of dimension (1, d_t) if T is (n, d_t) - return np.array([[self.model.coef_[0]]]) - - def score(self, Y, T, W=None, nuisances=None): - Y_res, T_res = nuisances - return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) - - np.random.seed(123) - W = np.random.normal(size=(100, 3)) - import scipy.special - from sklearn.linear_model import LogisticRegression - T = np.random.binomial(1, scipy.special.expit(W[:, 0])) - y = T + W[:, 0] + np.random.normal(0, 0.01, size=(100,)) - est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), - ModelFinal(), n_splits=2, discrete_treatment=True, discrete_instrument=False, - categories='auto', random_state=None) - est.fit(y, T, W=W) - - >>> est.score_ - 0.00673015... - >>> est.const_marginal_effect() - array([[1.008401...]]) - >>> est.effect() - array([1.008401...]) - >>> est.score(y, T, W=W) - 0.00310431... - >>> est.model_final.model.coef_[0] - 1.00840170... - Attributes ---------- models_nuisance: list of objects of type(model_nuisance) @@ -449,21 +299,67 @@ def score(self, Y, T, W=None, nuisances=None): The out-of-sample scores from training each nuisance model """ - def __init__(self, model_nuisance, model_final, *, + def __init__(self, *, discrete_treatment, discrete_instrument, categories, n_splits, random_state, monte_carlo_iterations=None): - self._ortho_learner_model_nuisance = clone(model_nuisance, safe=False) self._models_nuisance = None - self._ortho_learner_model_final = clone(model_final, safe=False) - self._n_splits = n_splits - self._discrete_treatment = discrete_treatment - self._discrete_instrument = discrete_instrument - self._init_random_state = random_state - self._random_state = check_random_state(random_state) - self._categories = categories - self._monte_carlo_iterations = monte_carlo_iterations + self.n_splits = n_splits + self.discrete_treatment = discrete_treatment + self.discrete_instrument = discrete_instrument + self.random_state = random_state + self.categories = categories + self.monte_carlo_iterations = monte_carlo_iterations super().__init__() + @abstractmethod + def _gen_ortho_learner_model_nuisance(self): + """ Must return a fresh instance of a nuisance model + + Returns + ------- + model_nuisance: estimator + The estimator for fitting the nuisance function. Must implement + `fit` and `predict` methods that both have signatures:: + + model_nuisance.fit(Y, T, X=X, W=W, Z=Z, + sample_weight=sample_weight, sample_var=sample_var) + model_nuisance.predict(Y, T, X=X, W=W, Z=Z, + sample_weight=sample_weight, sample_var=sample_var) + + In fact we allow for the model method signatures to skip any of the keyword arguments + as long as the class is always called with the omitted keyword argument set to ``None``. + This can be enforced in child classes by re-implementing the fit and the various effect + methods. If ``discrete_treatment=True``, then the input ``T`` to both above calls will be the + one-hot encoding of the original input ``T``, excluding the first column of the one-hot. + + If the estimator also provides a score method with the same arguments as fit, it will be used to + calculate scores during training. + """ + pass + + @abstractmethod + def _gen_ortho_learner_model_final(self): + """ Must return a fresh instance of a final model + + Returns + ------- + model_final: estimator for fitting the response residuals to the features and treatment residuals + Must implement `fit` and `predict` methods that must have signatures:: + + model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, + sample_weight=sample_weight, sample_var=sample_var) + model_final.predict(X=X) + + Predict, should just take the features X and return the constant marginal effect. In fact we allow for the + model method signatures to skip any of the keyword arguments as long as the class is always called with the + omitted keyword argument set to ``None``. Moreover, the predict function of the final model can take no + argument if the class is always called with ``X=None``. This can be enforced in child classes by + re-implementing the fit and the various effect methods. If ``discrete_treatment=True``, then the input ``T`` + to both above calls will be the one-hot encoding of the original input ``T``, excluding the first column of + the one-hot. + """ + pass + def _check_input_dims(self, Y, T, X=None, W=None, Z=None, *other_arrays): assert shape(Y)[0] == shape(T)[0], "Dimension mis-match!" for arr in [X, W, Z, *other_arrays]: @@ -495,27 +391,28 @@ def _subinds_check_none(self, var, inds): def _strata(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, groups=None, cache_values=False, monte_carlo_iterations=None): - if self._discrete_instrument: + if self.discrete_instrument: Z = LabelEncoder().fit_transform(np.ravel(Z)) - if self._discrete_treatment: + if self.discrete_treatment: enc = LabelEncoder() T = enc.fit_transform(np.ravel(T)) - if self._discrete_instrument: + if self.discrete_instrument: return T + Z * len(enc.classes_) else: return T - elif self._discrete_instrument: + elif self.discrete_instrument: return Z else: return None - def _prefit(self, Y, T, *args, **kwargs): - categories = self._categories - if self._discrete_treatment: - if categories != 'auto': - categories = [categories] # OneHotEncoder expects a 2D array with features per column - self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first') + def _prefit(self, Y, T, *args, only_final=False, **kwargs): + + # generate an instance of the final model + self._ortho_learner_model_final = self._gen_ortho_learner_model_final() + if not only_final: + # generate an instance of the nuisance model + self._ortho_learner_model_nuisance = self._gen_ortho_learner_model_nuisance() super()._prefit(Y, T, *args, **kwargs) @@ -523,7 +420,7 @@ def _prefit(self, Y, T, *args, **kwargs): "we will disallow passing X, W, and Z by position.", ['X', 'W', 'Z']) @BaseCateEstimator._wrap_fit def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=None, groups=None, - cache_values=False, inference=None): + cache_values=False, inference=None, only_final=False, check_input=True): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -552,44 +449,81 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`). + only_final: bool, defaul False + Whether to fit the nuisance models or use the existing cached values + check_input: bool, default True + Whether to check if the input is valid Returns ------- self : _OrthoLearner instance """ - self._random_state = check_random_state(self._init_random_state) - Y, T, X, W, Z, sample_weight, sample_var, groups = check_input_arrays( - Y, T, X, W, Z, sample_weight, sample_var, groups) - self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var, groups) - - all_nuisances = [] - fitted_inds = None - - for _ in range(self._monte_carlo_iterations or 1): - nuisances, new_inds = self._fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight, groups=groups) - all_nuisances.append(nuisances) - if fitted_inds is None: - fitted_inds = new_inds - elif not np.array_equal(fitted_inds, new_inds): - raise AttributeError("Different indices were fit by different folds, so they cannot be aggregated") - - if self._monte_carlo_iterations is not None: - # TODO: support different ways to aggregate, like median? - nuisances = np.mean(np.array(all_nuisances), axis=0) - - Y, T, X, W, Z, sample_weight, sample_var = (self._subinds_check_none(arr, fitted_inds) - for arr in (Y, T, X, W, Z, sample_weight, sample_var)) - nuisances = tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]) - self._cached_values = CachedValues(nuisances=nuisances, - Y=Y, T=T, X=X, W=W, Z=Z, - sample_weight=sample_weight, - sample_var=sample_var) if cache_values else None + self._random_state = check_random_state(self.random_state) + if check_input: + Y, T, X, W, Z, sample_weight, sample_var, groups = check_input_arrays( + Y, T, X, W, Z, sample_weight, sample_var, groups) + self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var, groups) + + if not only_final: + + if self.discrete_treatment: + categories = self.categories + if categories != 'auto': + categories = [categories] # OneHotEncoder expects a 2D array with features per column + self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first') + self._one_hot_encoder.fit(reshape(T, (-1, 1))) + self._d_t = (len(self._one_hot_encoder.categories_[0]) - 1,) + self.transformer = FunctionTransformer( + func=_EncoderWrapper(self._one_hot_encoder).encode, + validate=False) + else: + self.transformer = None + + if self.discrete_instrument: + z_enc = LabelEncoder() + z_ohe = OneHotEncoder(categories='auto', sparse=False, drop='first') + z_ohe.fit(reshape(z_enc.fit_transform(Z.ravel()), (-1, 1))) + self.z_transformer = FunctionTransformer( + func=_EncoderWrapper(z_ohe, z_enc).encode, + validate=False) + else: + self.z_transformer = None + + all_nuisances = [] + fitted_inds = None + + for _ in range(self.monte_carlo_iterations or 1): + nuisances, new_inds = self._fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight, groups=groups) + all_nuisances.append(nuisances) + if fitted_inds is None: + fitted_inds = new_inds + elif not np.array_equal(fitted_inds, new_inds): + raise AttributeError("Different indices were fit by different folds, so they cannot be aggregated") + + if self.monte_carlo_iterations is not None: + # TODO: support different ways to aggregate, like median? + nuisances = np.mean(np.array(all_nuisances), axis=0) + + Y, T, X, W, Z, sample_weight, sample_var = (self._subinds_check_none(arr, fitted_inds) + for arr in (Y, T, X, W, Z, sample_weight, sample_var)) + nuisances = tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]) + self._cached_values = CachedValues(nuisances=nuisances, + Y=Y, T=T, X=X, W=W, Z=Z, + sample_weight=sample_weight, + sample_var=sample_var, + groups=groups) if cache_values else None + else: + nuisances = self._cached_values.nuisances + # _d_t is altered by fit nuisances to what prefit does. So we need to perform the same + # alteration even when we only want to fit_final. + if self.transformer is not None: + self._d_t = (len(self._one_hot_encoder.categories_[0]) - 1,) + self._fit_final(Y=Y, T=T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) - self._cache_invalid_message = None return self def refit(self, inference=None): @@ -605,74 +539,40 @@ def refit(self, inference=None): This instance """ assert self._cached_values, "Refit can only be called if values were cached during the original fit" - assert self._cache_invalid_message is None, "Can't refit: " + self._cache_invalid_message cached = self._cached_values kwargs = filter_none_kwargs( - X=cached.X, - W=cached.W, - Z=cached.Z, - sample_weight=cached.sample_weight, - sample_var=cached.sample_var + Y=cached.Y, T=cached.T, X=cached.X, W=cached.W, Z=cached.Z, + sample_weight=cached.sample_weight, sample_var=cached.sample_var, + groups=cached.groups, ) - - # in case the final model uses randomness, make sure to reset the internal random state - self._random_state = check_random_state(self._init_random_state) - - # this sequence of calls parallels that in BaseCateEstimator.wrap_fit - - inference = self._get_inference(inference) - - # not strictly necessary when refitting - self._prefit(cached.Y, cached.T, **kwargs) - if inference is not None: - # this *is* necessary even when refitting - # because we might have a new inference object - inference.prefit(self, cached.Y, cached.T, **kwargs) - - # fit only the final model - self._fit_final(cached.Y, - cached.T, - nuisances=cached.nuisances, - **kwargs) - - if inference is not None: - # NOTE: we call inference fit *after* calling the main fit method - inference.fit(self, cached.Y, cached.T, **kwargs) - self._inference = inference - + _OrthoLearner.fit(self, **kwargs, + cache_values=True, inference=inference, only_final=True, check_input=False) return self def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + # use a binary array to get stratified split in case of discrete treatment - stratify = self._discrete_treatment or self._discrete_instrument + stratify = self.discrete_treatment or self.discrete_instrument strata = self._strata(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, groups=groups) if strata is None: strata = T # always safe to pass T as second arg to split even if we're not actually stratifying - if self._discrete_treatment: - T = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1))) - - if self._discrete_instrument: - z_enc = LabelEncoder() - Z = z_enc.fit_transform(Z.ravel()) - z_ohe = OneHotEncoder(categories='auto', sparse=False, drop='first') - Z = z_ohe.fit_transform(reshape(Z, (-1, 1))) - self.z_transformer = FunctionTransformer( - func=_EncoderWrapper(z_ohe, z_enc).encode, - validate=False) - else: - self.z_transformer = None + if self.discrete_treatment: + T = self.transformer.transform(reshape(T, (-1, 1))) + + if self.discrete_instrument: + Z = self.z_transformer.transform(reshape(Z, (-1, 1))) - if self._n_splits == 1: # special case, no cross validation + if self.n_splits == 1: # special case, no cross validation folds = None else: - splitter = check_cv(self._n_splits, [0], classifier=stratify) + splitter = check_cv(self.n_splits, [0], classifier=stratify) # if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state # TODO: ideally, we'd also infer whether we need a GroupKFold (if groups are passed) # however, sklearn doesn't support both stratifying and grouping (see # https://github.com/scikit-learn/scikit-learn/issues/13621), so for now the user needs to supply # their own object that supports grouping if they want to use groups. - if splitter != self._n_splits and isinstance(splitter, (KFold, StratifiedKFold)): + if splitter != self.n_splits and isinstance(splitter, (KFold, StratifiedKFold)): splitter.shuffle = True splitter.random_state = self._random_state @@ -687,14 +587,6 @@ def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None, group else: folds = splitter.split(to_split, strata) - if self._discrete_treatment: - self._d_t = shape(T)[1:] - self.transformer = FunctionTransformer( - func=_EncoderWrapper(self._one_hot_encoder).encode, - validate=False) - else: - self.transformer = None - nuisances, fitted_models, fitted_inds, scores = _crossfit(self._ortho_learner_model_nuisance, folds, Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, groups=groups) @@ -806,76 +698,12 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None): @property def ortho_learner_model_final(self): + if not hasattr(self, '_ortho_learner_model_final'): + raise AttributeError("Model is not fitted!") return self._ortho_learner_model_final - @ortho_learner_model_final.setter - def ortho_learner_model_final(self, model_final): - self._ortho_learner_model_final = clone(model_final, safe=False) - - @property - def ortho_learner_model_nuisance(self): - return self._ortho_learner_model_nuisance - - @ortho_learner_model_nuisance.setter - def ortho_learner_model_nuisance(self, model_nuisance): - self._ortho_learner_model_nuisance = clone(model_nuisance, safe=False) - self._cache_invalid_message = "Setting the nuisance model invalidates the cached nuisances" - - @property - def n_splits(self): - return self._n_splits - - @n_splits.setter - def n_splits(self, n_splits): - self._n_splits = n_splits - self._cache_invalid_message = "Setting the number of splits invalidates the cached nuisances" - - @property - def discrete_treatment(self): - return self._discrete_treatment - - @discrete_treatment.setter - def discrete_treatment(self, discrete_treatment): - self._discrete_treatment = discrete_treatment - self._cache_invalid_message = "Setting discrete_treatment invalidates the cached nuisances" - - @property - def discrete_instrument(self): - return self._discrete_instrument - - @discrete_instrument.setter - def discrete_instrument(self, discrete_instrument): - self._discrete_instrument = discrete_instrument - self._cache_invalid_message = "Setting discrete_instrument invalidates the cached nuisances" - - @property - def random_state(self): - return self._init_random_state - - @random_state.setter - def random_state(self, random_state): - self._init_random_state = random_state - self._random_state = check_random_state(random_state) - self._cache_invalid_message = "Setting the random state invalidates the cached nuisances" - - @property - def categories(self): - return self._categories - - @categories.setter - def categories(self, categories): - self._categories = categories - self._cache_invalid_message = "Setting the categories invalidates the cached nuisances" - - @property - def monte_carlo_iterations(self): - return self._monte_carlo_iterations - - @monte_carlo_iterations.setter - def monte_carlo_iterations(self, monte_carlo_iterations): - self._monte_carlo_iterations = monte_carlo_iterations - self._cache_invalid_message = "Setting the number of monte carlo iterations invalidates the cached nuisances" - @property def models_nuisance(self): + if not hasattr(self, '_models_nuisance'): + raise AttributeError("Model is not fitted!") return self._models_nuisance diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 640b450cb..c62e52770 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -25,6 +25,7 @@ The Econometrics Journal. https://arxiv.org/abs/1608.00060 """ +from abc import abstractmethod import numpy as np import copy from warnings import warn @@ -139,31 +140,6 @@ class _RLearner(_OrthoLearner): Parameters ---------- - model_y: estimator of E[Y | X, W] - The estimator for fitting the response to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. :: - - model_y.fit(X, W, Y, sample_weight=sample_weight) - model_y.predict(X, W) - - model_t: estimator of E[T | X, W] - The estimator for fitting the treatment to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. :: - - model_t.fit(X, W, T, sample_weight=sample_weight) - model_t.predict(X, W) - - model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must - take an extra second argument (the treatment residuals). Predict, on the other hand, - should just take the features and return the constant marginal effect. More, concretely:: - - model_final.fit(X, T_res, Y_res, - sample_weight=sample_weight, sample_var=sample_var) - model_final.predict(X) - discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities @@ -197,60 +173,6 @@ class _RLearner(_OrthoLearner): monte_carlo_iterations: int, optional The number of times to rerun the first stage models to reduce the variance of the nuisances. - Examples - -------- - The example code below implements a very simple version of the double machine learning - method on top of the :class:`._RLearner` class, for expository purposes. - For a more elaborate implementation of a Double Machine Learning child class of the class - checkout :class:`.DML` and its child classes: - - .. testcode:: - - import numpy as np - from sklearn.linear_model import LinearRegression - from econml._rlearner import _RLearner - from sklearn.base import clone - class ModelFirst: - def __init__(self, model): - self._model = clone(model, safe=False) - def fit(self, X, W, Y, sample_weight=None): - self._model.fit(np.hstack([X, W]), Y) - return self - def predict(self, X, W): - return self._model.predict(np.hstack([X, W])) - class ModelFinal: - def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None): - self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), - Y_res) - return self - def predict(self, X): - return self.model.predict(X) - np.random.seed(123) - X = np.random.normal(size=(1000, 3)) - y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,)) - est = _RLearner(ModelFirst(LinearRegression()), - ModelFirst(LinearRegression()), - ModelFinal(), - n_splits=2, discrete_treatment=False, categories='auto', random_state=None) - est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) - - >>> est.const_marginal_effect(np.ones((1,1))) - array([0.999631...]) - >>> est.effect(np.ones((1,1)), T0=0, T1=10) - array([9.996314...]) - >>> est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) - 9.73638006...e-05 - >>> est.rlearner_model_final.model - LinearRegression(fit_intercept=False) - >>> est.rlearner_model_final.model.coef_ - array([0.999631...]) - >>> est.score_ - 9.82623204...e-05 - >>> [mdl._model for mdl in est.models_y] - [LinearRegression(), LinearRegression()] - >>> [mdl._model for mdl in est.models_t] - [LinearRegression(), LinearRegression()] - Attributes ---------- models_y: list of objects of type(model_y) @@ -276,20 +198,66 @@ def predict(self, X): is multidimensional, then the average of the MSEs for each dimension of Y is returned. """ - def __init__(self, model_y, model_t, model_final, - discrete_treatment, categories, n_splits, random_state, monte_carlo_iterations=None): - self._rlearner_model_final = model_final - self._rlearner_model_y = model_y - self._rlearner_model_t = model_t - super().__init__(_ModelNuisance(clone(model_y, safe=False), clone(model_t, safe=False)), - _ModelFinal(clone(model_final, safe=False)), - discrete_treatment=discrete_treatment, + def __init__(self, *, discrete_treatment, categories, n_splits, random_state, monte_carlo_iterations=None): + super().__init__(discrete_treatment=discrete_treatment, discrete_instrument=False, # no instrument, so doesn't matter categories=categories, n_splits=n_splits, random_state=random_state, monte_carlo_iterations=monte_carlo_iterations) + @abstractmethod + def _gen_model_y(self): + """ + Returns + ------- + model_y: estimator of E[Y | X, W] + The estimator for fitting the response to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls), i.e. :: + + model_y.fit(X, W, Y, sample_weight=sample_weight) + model_y.predict(X, W) + """ + pass + + @abstractmethod + def _gen_model_t(self): + """ + Returns + ------- + model_t: estimator of E[T | X, W] + The estimator for fitting the treatment to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls), i.e. :: + + model_t.fit(X, W, T, sample_weight=sample_weight) + model_t.predict(X, W) + """ + pass + + @abstractmethod + def _gen_rlearner_model_final(self): + """ + Returns + ------- + model_final: estimator for fitting the response residuals to the features and treatment residuals + Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must + take an extra second argument (the treatment residuals). Predict, on the other hand, + should just take the features and return the constant marginal effect. More, concretely:: + + model_final.fit(X, T_res, Y_res, + sample_weight=sample_weight, sample_var=sample_var) + model_final.predict(X) + """ + pass + + def _gen_ortho_learner_model_nuisance(self): + return _ModelNuisance(self._gen_model_y(), self._gen_model_t()) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_rlearner_model_final()) + @_deprecate_positional("X, and should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, @@ -363,35 +331,7 @@ def score(self, Y, T, X=None, W=None): def rlearner_model_final(self): # NOTE: important to get parent's wrapped copy so that # after training wrapped featurizer is also trained, etc. - return self._ortho_learner_model_final._model_final - - @rlearner_model_final.setter - def rlearner_model_final(self, model_final): - model_final = clone(model_final, safe=False) - self._rlearner_model_final = model_final - self._ortho_learner_model_final = _ModelFinal(model_final) - - @property - def rlearner_model_y(self): - return self._ortho_learner_model_nuisance._model_y - - @rlearner_model_y.setter - def rlearner_model_y(self, model_y): - model_y = clone(model_y, safe=False) - self._rlearner_model_y = model_y - self._ortho_learner_model_nuisance = _ModelNuisance(model_y, self._rlearner_model_t) - self._cache_invalid_message = "Setting the Y model invalidates cached nuisances" - - @property - def rlearner_model_t(self): - return self._ortho_learner_model_nuisance._model_t - - @rlearner_model_t.setter - def rlearner_model_t(self, model_t): - model_t = clone(model_t, safe=False) - self._rlearner_model_t = model_t - self._ortho_learner_model_nuisance = _ModelNuisance(self._rlearner_model_y, model_t) - self._cache_invalid_message = "Setting the T model invalidates cached nuisances" + return self.ortho_learner_model_final._model_final @property def models_y(self): @@ -408,17 +348,3 @@ def nuisance_scores_y(self): @property def nuisance_scores_t(self): return self.nuisance_scores_[1] - - @_OrthoLearner.ortho_learner_model_final.setter - def ortho_learner_model_final(self, model): - raise AttributeError("OrthoLearner's final model cannot be set directly on an _RLearner instance; " - "set the rlearner_model_final isntead.") - - @_OrthoLearner.ortho_learner_model_nuisance.setter - def ortho_learner_model_nuisance(self, model): - raise AttributeError("Nuisance model cannot be set directly on an _RLearner instance; " - "set the Y and T model attributes instead.") - - @_OrthoLearner.discrete_instrument.setter - def discrete_instrument(self, flag): - raise AttributeError("_RLearners don't support instruments, so discrete_instrument will always be False") diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 31a245107..593c73300 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -511,7 +511,9 @@ def _get_inference_options(self): options.update(auto=LinearModelFinalInference) return options - bias_part_of_coef = False + @property + def bias_part_of_coef(self): + return False @property def coef_(self): @@ -528,7 +530,7 @@ def coef_(self): a vector and not a 2D array. For binary treatment the n_t dimension is also omitted. """ - return parse_final_model_params(self.model_final.coef_, self.model_final.intercept_, + return parse_final_model_params(self.model_final_.coef_, self.model_final_.intercept_, self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef, self.fit_cate_intercept)[0] @@ -547,7 +549,7 @@ def intercept_(self): """ if not self.fit_cate_intercept: raise AttributeError("No intercept was fitted!") - return parse_final_model_params(self.model_final.coef_, self.model_final.intercept_, + return parse_final_model_params(self.model_final_.coef_, self.model_final_.intercept_, self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef, self.fit_cate_intercept)[1] @@ -726,7 +728,7 @@ def _get_inference_options(self): @property def feature_importances_(self): - return self.model_final.feature_importances_ + return self.model_final_.feature_importances_ class LinearModelFinalCateEstimatorDiscreteMixin(BaseCateEstimator): diff --git a/econml/dml.py b/econml/dml.py index ac07393da..70ee975f1 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -52,7 +52,7 @@ ForestModelFinalCateEstimatorMixin, LinearModelFinalCateEstimatorMixin, StatsModelsCateEstimatorMixin) -from .inference import StatsModelsInference +from .inference import StatsModelsInference, GenericSingleTreatmentModelFinalInference from .sklearn_extensions.ensemble import SubsampledHonestForest from .sklearn_extensions.linear_model import (MultiOutputDebiasedLasso, StatsModelsLinearRegression, @@ -235,30 +235,17 @@ def original_featurizer(self): return self.rlearner_model_final._original_featurizer @property - def featurizer(self): + def featurizer_(self): # NOTE This is used by the inference methods and has to be the overall featurizer. intended # for internal use by the library return self.rlearner_model_final._featurizer @property - def model_final(self): + def model_final_(self): # NOTE This is used by the inference methods and is more for internal use to the library # We need to use the rlearner's copy to retain the information from fitting return self.rlearner_model_final._model - @model_final.setter - def model_final(self, model): - model = _FinalWrapper(model, - fit_cate_intercept=self.rlearner_model_final._fit_cate_intercept, - featurizer=self.rlearner_model_final._original_featurizer, - use_weight_trick=self.rlearner_model_final._use_weight_trick) - self._rlearner_model_final = model - - @_RLearner.rlearner_model_final.setter - def rlearner_model_final(self, model): - raise AttributeError("rlearner_final_model cannot be set directly on a DML instance; " - "set the model_final attributes instead.") - @property def model_cate(self): """ @@ -270,7 +257,7 @@ def model_cate(self): An instance of the model_final object that was fitted after calling fit which corresponds to the constant marginal CATE model. """ - return self._model_final + return self.rlearner_model_final._model @property def models_y(self): @@ -443,6 +430,9 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -461,53 +451,55 @@ def __init__(self, discrete_treatment=False, categories='auto', n_splits=2, - random_state=None, - monte_carlo_iterations=None): + monte_carlo_iterations=None, + random_state=None): # set random_state and discrete_treatment now even though they're set by super's init # so that they can be used to initialize models - self._random_state = check_random_state(random_state) - self._discrete_treatment = discrete_treatment + self.random_state = random_state + self.discrete_treatment = discrete_treatment # TODO: consider whether we need more care around stateful featurizers, # since we clone it and fit separate copies - self._fit_cate_intercept = fit_cate_intercept - self._linear_first_stages = linear_first_stages - self._featurizer = clone(featurizer, safe=False) - super().__init__(model_y=self._prepare_model_y(clone(model_y, safe=False)), - model_t=self._prepare_model_t(clone(model_t, safe=False)), - model_final=self._prepare_final_model(model_final), - discrete_treatment=discrete_treatment, + self.fit_cate_intercept = fit_cate_intercept + self.linear_first_stages = linear_first_stages + self.featurizer = clone(featurizer, safe=False) + self.model_y = clone(model_y, safe=False) + self.model_t = clone(model_t, safe=False) + self.model_final = clone(model_final, safe=False) + super().__init__(discrete_treatment=discrete_treatment, categories=categories, n_splits=n_splits, - random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) - - def _prepare_model_y(self, model_y): - self._model_y = model_y - if model_y == 'auto': - model_y = WeightedLassoCVWrapper(random_state=self._random_state) - return _FirstStageWrapper(model_y, True, self._featurizer, self._linear_first_stages, self._discrete_treatment) - - def _prepare_model_t(self, model_t): - self._model_t = model_t - if model_t == 'auto': - if self._discrete_treatment: - model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=self._random_state), - random_state=self._random_state) + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_model_y(self): + if self.model_y == 'auto': + model_y = WeightedLassoCVWrapper(random_state=self.random_state) + else: + model_y = clone(self.model_y, safe=False) + return _FirstStageWrapper(model_y, True, self.featurizer, self.linear_first_stages, self.discrete_treatment) + + def _gen_model_t(self): + if self.model_t == 'auto': + if self.discrete_treatment: + model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=self.random_state), + random_state=self.random_state) else: - model_t = WeightedLassoCVWrapper(random_state=self._random_state) - return _FirstStageWrapper(model_t, False, self._featurizer, - self._linear_first_stages, self._discrete_treatment) + model_t = WeightedLassoCVWrapper(random_state=self.random_state) + else: + model_t = clone(self.model_t, safe=False) + return _FirstStageWrapper(model_t, False, self.featurizer, + self.linear_first_stages, self.discrete_treatment) + + def _gen_featurizer(self): + return clone(self.featurizer, safe=False) - def _prepare_final_model(self, model): - self._model_final = model - return _FinalWrapper(self._model_final, self._fit_cate_intercept, self._featurizer, False) + def _gen_model_final(self): + return clone(self.model_final, safe=False) - def _update_models(self): - self._rlearner_model_y = self._prepare_model_y(self.model_y) - self._rlearner_model_t = self._prepare_model_t(self.model_t) - self._rlearner_model_final = self._prepare_model_final(self.model_final) + def _gen_rlearner_model_final(self): + return _FinalWrapper(self._gen_model_final(), self.fit_cate_intercept, self.featurizer, False) # override only so that we can update the docstring to indicate support for `StatsModelsInference` @_deprecate_positional("X and W should be passed by keyword only. In a future release " @@ -535,7 +527,6 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou must support a 'groups' argument to its split method. cache_values: bool, default False Whether to cache inputs and first stage results, which will allow refitting a different final model - inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`) and 'auto' @@ -549,90 +540,12 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou cache_values=cache_values, inference=inference) - @property - def linear_first_stages(self): - return self._linear_first_stages - - @linear_first_stages.setter - def linear_first_stages(self, linear_first_stages): - self._linear_first_stages = linear_first_stages - self._update_models() - # updating the models will set a misleading invalidation message, so overwrite it - self._cache_invalid_message = "Changing linear_first_stages invalidates stored nuisance results" - - @_BaseDML.featurizer.setter - def featurizer(self, featurizer): - self._featurizer = clone(featurizer, safe=False) - if self._linear_first_stages: - self._update_models() - # updating the models will set a misleading invalidation message, so overwrite it - self._cache_invalid_message = ("Changing the featurizer when linear_first_stages is True " - "invalidates stored nuisance results") - else: - # only the final model needs to change - self._rlearner_model_final = self._prepare_final_model(self._model_final) - - @property - def fit_cate_intercept(self): - return self._fit_cate_intercept - - @fit_cate_intercept.setter - def fit_cate_intercept(self, fit_cate_intercept): - self._fit_cate_intercept = fit_cate_intercept - # only the final model needs to change - self._rlearner_model_final = self._prepare_final_model(self._model_final) + def refit(self, *, inference='auto'): + super().refit(inference=inference) @property def bias_part_of_coef(self): - return self.fit_cate_intercept - - @_OrthoLearner.discrete_treatment.setter - def discrete_treatment(self, discrete_treatement): - # super().discrete_treatment = discrete_treatment - super(DML, DML).discrete_treatment.__set__(self, discrete_treatement) - # need to modify first-stage models in response, although unless model_t is 'auto' - # the treatment model probably also needs to be updated from a classifier to a regressor or vice-versa... - self._update_models() - - @property - def model_t(self): - return self._model_t - - @model_t.setter - def model_t(self, model_t): - model_t = clone(model_t, safe=False) - self._rlearner_model_t = self._prepare_model_y(model_t) - - @property - def model_y(self): - return self._model_y - - @model_y.setter - def model_y(self, model_y): - model_y = clone(model_y, safe=False) - self._rlearner_model_y = self._prepare_model_y(model_y) - - @_RLearner.rlearner_model_y.setter - def rlearner_model_y(self, model): - raise AttributeError("rlearner_model_y cannot be set directly on a DML instance; " - "set the model_y attribute instead.") - - @_RLearner.rlearner_model_t.setter - def rlearner_model_t(self, model): - raise AttributeError("rlearner_model_t cannot be set directly on a DML instance; " - "set the model_t attribute instead.") - - @_RLearner.rlearner_model_final.setter - def rlearner_model_final(self, model): - raise AttributeError("rlearner_model_final cannot be set directly on a DML instance; " - "set the model_final attribute instead.") - - # Setting the random_state affects Y and T nuisances models if they are auto - @_OrthoLearner.random_state.setter - def random_state(self, random_state): - # super().random_state = random_state - super(DML, DML).random_state.__set__(self, random_state) - self._update_models() + return self.rlearner_model_final._fit_cate_intercept class LinearDML(StatsModelsCateEstimatorMixin, DML): @@ -688,6 +601,9 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML): Unless an iterable is used, we call `split(X,T)` to generate the splits. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -706,19 +622,22 @@ def __init__(self, discrete_treatment=False, categories='auto', n_splits=2, - random_state=None, - monte_carlo_iterations=None): + monte_carlo_iterations=None, + random_state=None): super().__init__(model_y=model_y, model_t=model_t, - model_final=StatsModelsLinearRegression(fit_intercept=False), + model_final=None, featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, linear_first_stages=linear_first_stages, discrete_treatment=discrete_treatment, categories=categories, n_splits=n_splits, - random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state,) + + def _gen_model_final(self): + return StatsModelsLinearRegression(fit_intercept=False) # override only so that we can update the docstring to indicate support for `StatsModelsInference` @_deprecate_positional("X and W should be passed by keyword only. In a future release " @@ -748,7 +667,6 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou must support a 'groups' argument to its split method. cache_values: bool, default False Whether to cache inputs and first stage results, which will allow refitting a different final model - inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`) and 'statsmodels' @@ -763,10 +681,14 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou cache_values=cache_values, inference=inference) - @DML.model_final.setter + @property + def model_final(self): + return self._gen_model_final() + + @model_final.setter def model_final(self, model): - raise AttributeError("LinearDML final model can't be chnaged from " - "StatsModelsLinearRegression(fit_intercept=False)") + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML): @@ -844,6 +766,9 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML): Unless an iterable is used, we call `split(X,T)` to generate the splits. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -865,28 +790,29 @@ def __init__(self, discrete_treatment=False, categories='auto', n_splits=2, - random_state=None, - monte_carlo_iterations=None): - self._alpha = alpha - self._max_iter = max_iter - self._tol = tol - model_final = MultiOutputDebiasedLasso( - alpha=alpha, - fit_intercept=False, - max_iter=max_iter, - tol=tol, - random_state=random_state) + monte_carlo_iterations=None, + random_state=None): + self.alpha = alpha + self.max_iter = max_iter + self.tol = tol super().__init__(model_y=model_y, model_t=model_t, - model_final=model_final, + model_final=None, featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, linear_first_stages=linear_first_stages, discrete_treatment=discrete_treatment, categories=categories, n_splits=n_splits, - random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_model_final(self): + return MultiOutputDebiasedLasso(alpha=self.alpha, + fit_intercept=False, + max_iter=self.max_iter, + tol=self.tol, + random_state=self.random_state) @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) @@ -937,79 +863,31 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou sample_weight=sample_weight, sample_var=None, groups=groups, cache_values=cache_values, inference=inference) - @DML.model_final.setter - def model_final(self, model): - raise AttributeError("SparseLinearDML final model can't be set directly;" - "instead set alpha, tol, or max_iter to change the debiased lasso settings") - - @property - def alpha(self): - return self._alpha - - @alpha.setter - def alpha(self, alpha): - self._alpha = alpha - self._model_final = MultiOutputDebiasedLasso( - alpha=self.alpha, - fit_intercept=False, - max_iter=self.max_iter, - tol=self.tol, - random_state=self.random_state) - @property - def max_iter(self): - return self._max_iter - - @max_iter.setter - def max_iter(self, max_iter): - self.max_iter = max_iter - self._model_final = MultiOutputDebiasedLasso( - alpha=self.alpha, - fit_intercept=False, - max_iter=self.max_iter, - tol=self.tol, - random_state=self.random_state) + def model_final(self): + return self._gen_model_final() - @property - def tol(self): - return self._tol - - @tol.setter - def tol(self, tol): - self._tol = tol - self._model_final = MultiOutputDebiasedLasso( - alpha=self.alpha, - fit_intercept=False, - max_iter=self.max_iter, - tol=self.tol, - random_state=self.random_state) - - @_OrthoLearner.random_state.setter - def random_state(self, random_state): - # super().random_state = random_state - super(SparseLinearDML, SparseLinearDML).random_state.__set__(self, random_state) - self._model_final = MultiOutputDebiasedLasso( - alpha=self.alpha, - fit_intercept=False, - max_iter=self.max_iter, - tol=self.tol, - random_state=self.random_state) + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") class _RandomFeatures(TransformerMixin): def __init__(self, dim, bw, random_state): - self._dim = dim - self._bw = bw - self._random_state = random_state + self.dim = dim + self.bw = bw + self.random_state = random_state def fit(self, X): - random_state = check_random_state(self._random_state) - self.omegas = random_state.normal(0, 1 / self._bw, size=(shape(X)[1], self._dim)) - self.biases = random_state.uniform(0, 2 * np.pi, size=(1, self._dim)) + random_state = check_random_state(self.random_state) + self.omegas_ = random_state.normal(0, 1 / self.bw, size=(shape(X)[1], self.dim)) + self.biases_ = random_state.uniform(0, 2 * np.pi, size=(1, self.dim)) + self.dim_ = self.dim return self def transform(self, X): - return np.sqrt(2 / self._dim) * np.cos(np.matmul(X, self.omegas) + self.biases) + return np.sqrt(2 / self.dim_) * np.cos(np.matmul(X, self.omegas_) + self.biases_) class KernelDML(DML): @@ -1064,6 +942,9 @@ class KernelDML(DML): Unless an iterable is used, we call `split(X,T)` to generate the splits. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -1075,53 +956,44 @@ class KernelDML(DML): """ def __init__(self, model_y='auto', model_t='auto', fit_cate_intercept=True, - dim=20, bw=1.0, discrete_treatment=False, categories='auto', n_splits=2, random_state=None, - monte_carlo_iterations=None): - super().__init__(model_y=model_y, model_t=model_t, - model_final=ElasticNetCV(fit_intercept=False, random_state=random_state), - featurizer=_RandomFeatures(dim, bw, random_state), + dim=20, bw=1.0, discrete_treatment=False, categories='auto', n_splits=2, + monte_carlo_iterations=None, random_state=None): + self.dim = dim + self.bw = bw + super().__init__(model_y=model_y, + model_t=model_t, + model_final=None, + featurizer=None, fit_cate_intercept=fit_cate_intercept, discrete_treatment=discrete_treatment, categories=categories, - n_splits=n_splits, random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) - @property - def bw(self): - return self._bw + def _gen_model_final(self): + return ElasticNetCV(fit_intercept=False, random_state=self.random_state) - @bw.setter - def bw(self, bw): - self._bw = bw - # super().featurizer = _RandomFeatures(self._dim, self._bw, self.random_state) - super(KernelDML, KernelDML).featurizer.__set__(self, _RandomFeatures(self._dim, self._bw, self.random_state)) + def _gen_featurizer(self): + return _RandomFeatures(self.dim, self.bw, self.random_state) @property - def dim(self): - return self._dim + def featurizer(self): + return self._gen_featurizer() - @dim.setter - def dim(self, dim): - self._dim = dim - # super().featurizer = _RandomFeatures(self._dim, self._bw, self.random_state) - super(KernelDML, KernelDML).featurizer.__set__(self, _RandomFeatures(self._dim, self._bw, self.random_state)) + @featurizer.setter + def featurizer(self, value): + if value is not None: + raise ValueError("Parameter `featurizer` cannot be altered for this estimator!") - @_BaseDML.featurizer.setter - def featurizer(self, dim): - raise AttributeError("KernelDML featurizer can't be set directly; " - "instead set the bw and dim attributes to modify the kernel") + @property + def model_final(self): + return self._gen_model_final() - @DML.model_final.setter + @model_final.setter def model_final(self, model): - raise AttributeError("KernelDML final model can't be changed from ElasticNetCV") - - @_OrthoLearner.random_state.setter - def random_state(self, random_state): - # super().random_state = random_state - super(KernelDML, KernelDML).random_state.__set__(self, random_state) - # super().model_final = ElasticNetCV(fit_intercept=False, random_state=random_state) - self._model_final = ElasticNetCV(fit_intercept=False, - random_state=random_state) + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") class NonParamDML(_BaseDML): @@ -1174,6 +1046,9 @@ class NonParamDML(_BaseDML): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -1190,60 +1065,46 @@ def __init__(self, discrete_treatment=False, categories='auto', n_splits=2, - random_state=None, - monte_carlo_iterations=None): + monte_carlo_iterations=None, + random_state=None): # TODO: consider whether we need more care around stateful featurizers, # since we clone it and fit separate copies - model_y = clone(model_y, safe=False) - model_t = clone(model_t, safe=False) - self._model_y = model_y - self._model_t = model_t - self._featurizer = clone(featurizer, safe=False) - - super().__init__(model_y=_FirstStageWrapper(model_y, True, - featurizer, False, discrete_treatment), - model_t=_FirstStageWrapper(model_t, False, - featurizer, False, discrete_treatment), - model_final=_FinalWrapper(model_final, False, featurizer, True), - discrete_treatment=discrete_treatment, + self.model_y = clone(model_y, safe=False) + self.model_t = clone(model_t, safe=False) + self.featurizer = clone(featurizer, safe=False) + self.model_final = clone(model_final, safe=False) + super().__init__(discrete_treatment=discrete_treatment, categories=categories, n_splits=n_splits, - random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) - @_BaseDML.featurizer.setter - def featurizer(self, featurizer): - self._featurizer = clone(featurizer, safe=False) - # featurizer isn't actually used by first stage models since linear_first_stages is False, - # so we only need to update model_final - # _BaseDML's final model setter reuses its old featurizer - # so we need to pass _BaseDML, not DML to the super() call - # super(_BasaeDML).model_final = _FinalWrapper(...) - super(_BaseDML, _BaseDML).model_final.__set__(self, _FinalWrapper( - self.model_final, False, self._featurizer, True)) + def _get_inference_options(self): + # add blb to parent's options + options = super()._get_inference_options() + options.update(auto=GenericSingleTreatmentModelFinalInference) + return options - @property - def model_t(self): - return self._model_t + def _gen_model_y(self): + return _FirstStageWrapper(clone(self.model_y, safe=False), True, + self.featurizer, False, self.discrete_treatment) - @model_t.setter - def model_t(self, model_t): - model_t = clone(model_t, safe=False) - self._model_t = model_t - self._rlearner_model_t = _FirstStageWrapper(model_t, False, - self.featurizer, False, self.discrete_treatment) + def _gen_model_t(self): + return _FirstStageWrapper(clone(self.model_t, safe=False), False, + self.featurizer, False, self.discrete_treatment) - @property - def model_y(self): - return self._model_y + def _gen_featurizer(self): + return clone(self.featurizer, safe=False) - @model_y.setter - def model_y(self, model_y): - model_y = clone(model_y, safe=False) - self._model_y = model_y - self._rlearner_model_y = _FirstStageWrapper(model_y, False, - self.featurizer, False, self.discrete_treatment) + def _gen_model_final(self): + return clone(self.model_final, safe=False) + + def _gen_rlearner_model_final(self): + return _FinalWrapper(self._gen_model_final(), False, self.featurizer, True) + + def refit(self, *, inference='auto'): + super().refit(inference=inference) class ForestDML(ForestModelFinalCateEstimatorMixin, NonParamDML): @@ -1268,7 +1129,7 @@ class ForestDML(ForestModelFinalCateEstimatorMixin, NonParamDML): The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). The first category will be treated as the control treatment. - n_crossfit_splits: int, cross-validation generator or an iterable, optional (Default=2) + n_splits: int, cross-validation generator or an iterable, optional (Default=2) Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1285,6 +1146,9 @@ class ForestDML(ForestModelFinalCateEstimatorMixin, NonParamDML): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + n_estimators : integer, optional (default=100) The total number of trees in the forest. The forest consists of a forest of sqrt(n_estimators) sub-forests, where each sub-forest @@ -1404,9 +1268,11 @@ class ForestDML(ForestModelFinalCateEstimatorMixin, NonParamDML): def __init__(self, model_y, model_t, + featurizer=None, discrete_treatment=False, categories='auto', - n_crossfit_splits=2, + n_splits=2, + monte_carlo_iterations=None, n_estimators=100, criterion="mse", max_depth=None, @@ -1420,41 +1286,45 @@ def __init__(self, honest=True, n_jobs=None, verbose=0, - random_state=None, - monte_carlo_iterations=None): - self._n_estimators = n_estimators - self._criterion = criterion - self._max_depth = max_depth - self._min_samples_split = min_samples_split - self._min_samples_leaf = min_samples_leaf - self._min_weight_fraction_leaf = min_weight_fraction_leaf - self._max_features = max_features - self._max_leaf_nodes = max_leaf_nodes - self._min_impurity_decrease = min_impurity_decrease - self._subsample_fr = subsample_fr - self._honest = honest - self._n_jobs = n_jobs - self._verbose = verbose - model_final = SubsampledHonestForest(n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - subsample_fr=subsample_fr, - honest=honest, - n_jobs=n_jobs, - random_state=random_state, - verbose=verbose) - super().__init__(model_y=model_y, model_t=model_t, - model_final=model_final, featurizer=None, + random_state=None): + self.n_estimators = n_estimators + self.criterion = criterion + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.subsample_fr = subsample_fr + self.honest = honest + self.n_jobs = n_jobs + self.verbose = verbose + super().__init__(model_y=model_y, + model_t=model_t, + model_final=None, + featurizer=featurizer, discrete_treatment=discrete_treatment, categories=categories, - n_splits=n_crossfit_splits, random_state=random_state, - monte_carlo_iterations=monte_carlo_iterations) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_model_final(self): + return SubsampledHonestForest(n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + subsample_fr=self.subsample_fr, + honest=self.honest, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose) @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) @@ -1484,7 +1354,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou must support a 'groups' argument to its split method. cache_values: bool, default False Whether to cache inputs and first stage results, which will allow refitting a different final model - inference: string, `Inference` instance, or None + inference: string, `Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`) and 'blb' (for Bootstrap-of-Little-Bags based inference) @@ -1498,143 +1368,14 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou cache_values=cache_values, inference=inference) - @DML.model_final.setter - def model_final(self, model): - raise AttributeError("ForestDML final model can't be set directly;" - "instead set individual attributes to modify the SubsampledHonestForest settings") - - def _set_model_final(self): - self._rlearner_model_final = SubsampledHonestForest(n_estimators=self._n_estimators, - criterion=self._criterion, - max_depth=self._max_depth, - min_samples_split=self._min_samples_split, - min_samples_leaf=self._min_samples_leaf, - min_weight_fraction_leaf=self._min_weight_fraction_leaf, - max_features=self._max_features, - max_leaf_nodes=self._max_leaf_nodes, - min_impurity_decrease=self._min_impurity_decrease, - subsample_fr=self._subsample_fr, - honest=self._honest, - n_jobs=self._n_jobs, - random_state=self._random_state, - verbose=self._verbose) - - @property - def n_estimators(self): - return self._n_estimators - - @n_estimators.setter - def n_estimators(self, n_estimators): - self._n_estimators = n_estimators - self._set_model_final() - - @property - def criterion(s): - return self._criterion - - @criterion.setter - def criterion(self, criterion): - self._criterion = criterion - self._set_model_final() - @property - def max_depth(s): - return self._max_depth - - @max_depth.setter - def max_depth(self, max_depth): - self._max_depth = max_depth - self._set_model_final() - - @property - def min_samples_split(self, min): - return self._min_samples_split - - @min_samples_split.setter - def min_samples_split(self, min_samples_split): - self._min_samples_split = min_samples_split - self._set_model_final() - - @property - def min_samples_leaf(self, mi): - return self._min_samples_leaf - - @min_samples_leaf.setter - def min_samples_leaf(self, min_samples_leaf): - self._min_samples_leaf = min_samples_leaf - self._set_model_final() - - @property - def min_weight_fraction_leaf(self, min_weight): - return self._min_weight_fraction_leaf - - @min_weight_fraction_leaf.setter - def min_weight_fraction_leaf(self, min_weight_fraction_leaf): - self._min_weight_fraction_leaf = min_weight_fraction_leaf - self._set_model_final() - - @property - def max_features(self): - return self._max_features - - @max_features.setter - def max_features(self, max_features): - self._max_features = max_features - self._set_model_final() - - @property - def max_leaf_nodes(self, ): - return self._max_leaf_nodes - - @max_leaf_nodes.setter - def max_leaf_nodes(self, max_leaf_nodes): - self._max_leaf_nodes = max_leaf_nodes - self._set_model_final() - - @property - def min_impurity_decrease(self, min_imp): - return self._min_impurity_decrease - - @min_impurity_decrease.setter - def min_impurity_decrease(self, min_impurity_decrease): - self._min_impurity_decrease = min_impurity_decrease - self._set_model_final() - - @property - def subsample_fr(self): - return self._subsample_fr - - @subsample_fr.setter - def subsample_fr(self, subsample_fr): - self._subsample_fr = subsample_fr - self._set_model_final() - - @property - def honest(self): - return self._honest - - @honest.setter - def honest(self, honest): - self._honest = honest - self._set_model_final() - - @property - def n_jobs(self): - return self._n_jobs - - @n_jobs.setter - def n_jobs(self, n_jobs): - self._n_jobs = n_jobs - self._set_model_final() - - @property - def verbose(self): - return self._verbose + def model_final(self): + return self._gen_model_final() - @verbose.setter - def verbose(self, verbose): - self._verbose = verbose - self._set_model_final() + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") @deprecated("The DMLCateEstimator class has been renamed to DML; " diff --git a/econml/drlearner.py b/econml/drlearner.py index b98e15326..7c5668d05 100644 --- a/econml/drlearner.py +++ b/econml/drlearner.py @@ -258,6 +258,9 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -367,23 +370,48 @@ def __init__(self, model_propensity='auto', min_propensity=1e-6, categories='auto', n_splits=2, + monte_carlo_iterations=None, random_state=None): - if model_propensity == 'auto': - model_propensity = LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto', - random_state=random_state) - if model_regression == 'auto': - model_regression = WeightedLassoCVWrapper(cv=3, random_state=random_state) - self._multitask_model_final = multitask_model_final - super().__init__(_ModelNuisance(model_propensity, model_regression, min_propensity), - _ModelFinal(model_final, featurizer, multitask_model_final), - n_splits=n_splits, discrete_treatment=True, + self.model_propensity = clone(model_propensity, safe=False) + self.model_regression = clone(model_regression, safe=False) + self.model_final = clone(model_final, safe=False) + self.multitask_model_final = multitask_model_final + self.featurizer = clone(featurizer, safe=False) + self.min_propensity = min_propensity + super().__init__(n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + discrete_treatment=True, discrete_instrument=False, # no instrument, so doesn't matter categories=categories, random_state=random_state) + def _gen_ortho_learner_model_nuisance(self): + if self.model_propensity == 'auto': + model_propensity = LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto', + random_state=self.random_state) + else: + model_propensity = clone(self.model_propensity, safe=False) + + if self.model_regression == 'auto': + model_regression = WeightedLassoCVWrapper(cv=3, random_state=self.random_state) + else: + model_regression = clone(self.model_regression, safe=False) + + return _ModelNuisance(model_propensity, model_regression, self.min_propensity) + + def _gen_featurizer(self): + return clone(self.featurizer, safe=False) + + def _gen_model_final(self): + return clone(self.model_final, safe=False) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_model_final(), self._gen_featurizer(), self.multitask_model_final) + @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) - def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference=None): + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference=None): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -405,6 +433,8 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`). @@ -416,7 +446,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou # Replacing fit from _OrthoLearner, to enforce Z=None and improve the docstring return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) def score(self, Y, T, X=None, W=None): """ @@ -458,9 +488,9 @@ def multitask_model_cate(self): vector of outcomes correspond to the CATE model for each treatment, compared to baseline. Available only when multitask_model_final=True. """ - if not self._multitask_model_final: + if not self.ortho_learner_model_final._multitask_model_final: raise AttributeError("Separate CATE models were fitted for each treatment! Use model_cate.") - return super().model_final.model_cate + return self.ortho_learner_model_final.model_cate def model_cate(self, T=1): """ @@ -477,12 +507,12 @@ def model_cate(self, T=1): An instance of the model_final object that was fitted after calling fit which corresponds to the CATE model for treatment T=t, compared to baseline. Available when multitask_model_final=False. """ - if self._multitask_model_final: + if self.ortho_learner_model_final._multitask_model_final: raise AttributeError("A single multitask model was fitted for all treatments! Use multitask_model_cate.") _, T = self._expand_treatments(None, T) ind = inverse_onehot(T).item() - 1 assert ind >= 0, "No model was fitted for the control" - return super().model_final.models_cate[ind] + return self.ortho_learner_model_final.models_cate[ind] @property def models_propensity(self): @@ -521,7 +551,7 @@ def nuisance_scores_regression(self): return self.nuisance_scores_[1] @property - def featurizer(self): + def featurizer_(self): """ Get the fitted featurizer. @@ -531,7 +561,7 @@ def featurizer(self): An instance of the fitted featurizer that was used to preprocess X in the final CATE model training. Available only when featurizer is not None and X is not None. """ - return super().model_final._featurizer + return self.ortho_learner_model_final._featurizer def cate_feature_names(self, feature_names=None): """ @@ -556,14 +586,22 @@ def cate_feature_names(self, feature_names=None): return None if feature_names is None: feature_names = self._input_names["feature_names"] - if self.featurizer is None: + if self.featurizer_ is None: return feature_names - elif hasattr(self.featurizer, 'get_feature_names'): + elif hasattr(self.featurizer_, 'get_feature_names'): # This fails if X=None and featurizer is not None, but that case is handled above - return self.featurizer.get_feature_names(feature_names) + return self.featurizer_.get_feature_names(feature_names) else: raise AttributeError("Featurizer does not have a method: get_feature_names!") + @property + def model_final_(self): + return self.ortho_learner_model_final._model_final + + @property + def fitted_models_final(self): + return self.ortho_learner_model_final.models_cate + class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner): """ @@ -642,6 +680,9 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner): Unless an iterable is used, we call `split(X,T)` to generate the splits. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -699,21 +740,31 @@ def __init__(self, fit_cate_intercept=True, min_propensity=1e-6, categories='auto', - n_splits=2, random_state=None): + n_splits=2, + monte_carlo_iterations=None, + random_state=None): self.fit_cate_intercept = fit_cate_intercept super().__init__(model_propensity=model_propensity, model_regression=model_regression, - model_final=StatsModelsLinearRegression(fit_intercept=fit_cate_intercept), + model_final=None, featurizer=featurizer, multitask_model_final=False, min_propensity=min_propensity, categories=categories, n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, random_state=random_state) + def _gen_model_final(self): + return StatsModelsLinearRegression(fit_intercept=self.fit_cate_intercept) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_model_final(), self._gen_featurizer(), False) + @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) - def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference='auto'): + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference='auto'): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -735,6 +786,8 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports ``'bootstrap'`` (or an instance of :class:`.BootstrapInference`) and ``'statsmodels'`` @@ -747,7 +800,10 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou # Replacing fit from DRLearner, to add statsmodels inference in docstring return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) + + def refit(self, *, inference='auto'): + super().refit(inference=inference) @property def multitask_model_cate(self): @@ -756,12 +812,22 @@ def multitask_model_cate(self): return super().multitask_model_cate @property - def model_final(self): - return super().model_final._model_final + def multitask_model_final(self): + return False + + @multitask_model_final.setter + def multitask_model_final(self, value): + if value: + raise ValueError("Parameter `multitask_model_final` cannot change from `False` for this estimator!") @property - def fitted_models_final(self): - return super().model_final.models_cate + def model_final(self): + return self._gen_model_final() + + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner): @@ -853,6 +919,9 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner): Unless an iterable is used, we call `split(X,T)` to generate the splits. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -913,26 +982,34 @@ def __init__(self, tol=1e-4, min_propensity=1e-6, categories='auto', - n_splits=2, random_state=None): + n_splits=2, + monte_carlo_iterations=None, + random_state=None): self.fit_cate_intercept = fit_cate_intercept - model_final = DebiasedLasso( - alpha=alpha, - fit_intercept=fit_cate_intercept, - max_iter=max_iter, - tol=tol) super().__init__(model_propensity=model_propensity, model_regression=model_regression, - model_final=model_final, + model_final=None, featurizer=featurizer, multitask_model_final=False, min_propensity=min_propensity, categories=categories, n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, random_state=random_state) + def _gen_model_final(self): + return DebiasedLasso(alpha=self.alpha, + fit_intercept=self.fit_cate_intercept, + max_iter=self.max_iter, + tol=self.tol) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_model_final(), self._gen_featurizer(), False) + @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) - def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference='auto'): + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference='auto'): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -954,6 +1031,8 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string, :class:`.Inference` instance, or None Method for performing inference. This estimator supports ``'bootstrap'`` (or an instance of :class:`.BootstrapInference`) and ``'debiasedlasso'`` @@ -969,26 +1048,33 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou warn("This estimator does not yet support sample variances and inference does not take " "sample variances into account. This feature will be supported in a future release.") check_high_dimensional(X, T, threshold=5, featurizer=self.featurizer, - discrete_treatment=self._discrete_treatment, + discrete_treatment=self.discrete_treatment, msg="The number of features in the final model (< 5) is too small for a sparse model. " "We recommend using the LinearDRLearner for this low-dimensional setting.") return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=None, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) + + def refit(self, *, inference='auto'): + super().refit(inference=inference) @property - def multitask_model_cate(self): - # Replacing this method which is invalid for this class, so that we make the - # dosctring empty and not appear in the docs. - return super().multitask_model_cate + def multitask_model_final(self): + return False + + @multitask_model_final.setter + def multitask_model_final(self, value): + if value: + raise ValueError("Parameter `multitask_model_final` cannot change from `False` for this estimator!") @property def model_final(self): - return super().model_final._model_final + return self._gen_model_final() - @property - def fitted_models_final(self): - return super().model_final.models_cate + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") class ForestDRLearner(ForestModelFinalCateEstimatorDiscreteMixin, DRLearner): @@ -1015,7 +1101,7 @@ class ForestDRLearner(ForestModelFinalCateEstimatorDiscreteMixin, DRLearner): The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). The first category will be treated as the control treatment. - n_crossfit_splits: int, cross-validation generator or an iterable, optional (Default=2) + n_splits: int, cross-validation generator or an iterable, optional (Default=2) Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1032,6 +1118,9 @@ class ForestDRLearner(ForestModelFinalCateEstimatorDiscreteMixin, DRLearner): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + n_estimators : integer, optional (default=100) The total number of trees in the forest. The forest consists of a forest of sqrt(n_estimators) sub-forests, where each sub-forest @@ -1148,9 +1237,11 @@ class ForestDRLearner(ForestModelFinalCateEstimatorDiscreteMixin, DRLearner): def __init__(self, model_regression, model_propensity, + featurizer=None, min_propensity=1e-6, categories='auto', - n_crossfit_splits=2, + n_splits=2, + monte_carlo_iterations=None, n_estimators=1000, criterion="mse", max_depth=None, @@ -1165,30 +1256,53 @@ def __init__(self, n_jobs=None, verbose=0, random_state=None): - model_final = SubsampledHonestForest(n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - subsample_fr=subsample_fr, - honest=honest, - n_jobs=n_jobs, - random_state=random_state, - verbose=verbose) - super().__init__(model_regression=model_regression, model_propensity=model_propensity, - model_final=model_final, featurizer=None, + self.n_estimators = n_estimators + self.criterion = criterion + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.subsample_fr = subsample_fr + self.honest = honest + self.n_jobs = n_jobs + self.verbose = verbose + super().__init__(model_regression=model_regression, + model_propensity=model_propensity, + model_final=None, + featurizer=featurizer, multitask_model_final=False, min_propensity=min_propensity, categories=categories, - n_splits=n_crossfit_splits, random_state=random_state) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_model_final(self): + return SubsampledHonestForest(n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + subsample_fr=self.subsample_fr, + honest=self.honest, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_model_final(), self._gen_featurizer(), False) @_deprecate_positional("X and W should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) - def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference='auto'): + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference='auto'): """ Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·). @@ -1211,6 +1325,8 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string, `Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of :class:`.BootstrapInference`) and 'blb' @@ -1222,16 +1338,29 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou """ return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=None, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) + + def refit(self, *, inference='auto'): + super().refit(inference=inference) def multitask_model_cate(self): # Replacing to remove docstring super().multitask_model_cate() @property - def model_final(self): - return super().model_final._model_final + def multitask_model_final(self): + return False + + @multitask_model_final.setter + def multitask_model_final(self, value): + if value: + raise ValueError("Parameter `multitask_model_final` cannot change from `False` for this estimator!") @property - def fitted_models_final(self): - return super().model_final.models_cate + def model_final(self): + return self._gen_model_final() + + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") diff --git a/econml/inference.py b/econml/inference.py index 1e696561f..0ba29f20a 100644 --- a/econml/inference.py +++ b/econml/inference.py @@ -95,8 +95,8 @@ class has a model_final method, whose predict(cross_product(X, [0, ..., 1, ..., """ def prefit(self, estimator, *args, **kwargs): - self.model_final = estimator.model_final - self.featurizer = estimator.featurizer if hasattr(estimator, 'featurizer') else None + self.model_final = estimator.model_final_ + self.featurizer = estimator.featurizer_ if hasattr(estimator, 'featurizer_') else None def fit(self, estimator, *args, **kwargs): # once the estimator has been fit, it's kosher to store d_t here @@ -326,8 +326,8 @@ class GenericModelFinalInferenceDiscrete(Inference): """ def prefit(self, estimator, *args, **kwargs): - self.model_final = estimator.model_final - self.featurizer = estimator.featurizer if hasattr(estimator, 'featurizer') else None + self.model_final = estimator.model_final_ + self.featurizer = estimator.featurizer_ if hasattr(estimator, 'featurizer_') else None def fit(self, estimator, *args, **kwargs): # once the estimator has been fit, it's kosher to store d_t here diff --git a/econml/ortho_iv.py b/econml/ortho_iv.py index 0ad0134e5..021cd15f0 100644 --- a/econml/ortho_iv.py +++ b/econml/ortho_iv.py @@ -136,19 +136,26 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None class _BaseDMLATEIV(_OrthoLearner): - def __init__(self, model_nuisance, - discrete_instrument=False, discrete_treatment=False, + def __init__(self, discrete_instrument=False, + discrete_treatment=False, categories='auto', - n_splits=2, random_state=None): - - super().__init__(model_nuisance, _BaseDMLATEIVModelFinal(), - discrete_treatment=discrete_treatment, discrete_instrument=discrete_instrument, + n_splits=2, + monte_carlo_iterations=None, + random_state=None): + super().__init__(discrete_treatment=discrete_treatment, + discrete_instrument=discrete_instrument, categories=categories, - n_splits=n_splits, random_state=random_state) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_ortho_learner_model_final(self): + return _BaseDMLATEIVModelFinal() @_deprecate_positional("W and Z should be passed by keyword only. In a future release " "we will disallow passing W and Z by position.", ['W', 'Z']) - def fit(self, Y, T, Z, W=None, *, sample_weight=None, sample_var=None, groups=None, inference=None): + def fit(self, Y, T, Z, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference=None): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -170,6 +177,8 @@ def fit(self, Y, T, Z, W=None, *, sample_weight=None, sample_var=None, groups=No All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string,:class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of:class:`.BootstrapInference`). @@ -181,7 +190,7 @@ def fit(self, Y, T, Z, W=None, *, sample_weight=None, sample_var=None, groups=No # Replacing fit from _OrthoLearner, to enforce W=None and improve the docstring return super().fit(Y, T, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) def score(self, Y, T, Z, W=None): """ @@ -271,22 +280,32 @@ class DMLATEIV(_BaseDMLATEIV): a biased ATE. """ - def __init__(self, model_Y_W, model_T_W, model_Z_W, - discrete_treatment=False, discrete_instrument=False, + def __init__(self, *, model_Y_W, model_T_W, model_Z_W, + discrete_treatment=False, + discrete_instrument=False, categories='auto', - n_splits=2, random_state=None): - - super().__init__(_DMLATEIVModelNuisance(model_Y_W=_FirstStageWrapper(model_Y_W, discrete_target=False), - model_T_W=_FirstStageWrapper( - model_T_W, discrete_target=discrete_treatment), - model_Z_W=_FirstStageWrapper( - model_Z_W, discrete_target=discrete_instrument)), - discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment, + n_splits=2, + monte_carlo_iterations=None, + random_state=None): + self.model_Y_W = clone(model_Y_W, safe=False) + self.model_T_W = clone(model_T_W, safe=False) + self.model_Z_W = clone(model_Z_W, safe=False) + super().__init__(discrete_instrument=discrete_instrument, + discrete_treatment=discrete_treatment, categories=categories, - n_splits=n_splits, random_state=random_state) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) + + def _gen_ortho_learner_model_nuisance(self): + return _DMLATEIVModelNuisance( + model_Y_W=_FirstStageWrapper(clone(self.model_Y_W, safe=False), discrete_target=False), + model_T_W=_FirstStageWrapper(clone(self.model_T_W, safe=False), discrete_target=self.discrete_treatment), + model_Z_W=_FirstStageWrapper(clone(self.model_Z_W, safe=False), discrete_target=self.discrete_instrument)) class _ProjectedDMLATEIVModelNuisance: + def __init__(self, model_Y_W, model_T_W, model_T_WZ): self._model_Y_W = clone(model_Y_W, safe=False) self._model_T_W = clone(model_T_W, safe=False) @@ -330,21 +349,30 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): class ProjectedDMLATEIV(_BaseDMLATEIV): - def __init__(self, model_Y_W, model_T_W, model_T_WZ, - discrete_treatment=False, discrete_instrument=False, + + def __init__(self, *, model_Y_W, model_T_W, model_T_WZ, + discrete_treatment=False, + discrete_instrument=False, categories='auto', - n_splits=2, random_state=None): + n_splits=2, + monte_carlo_iterations=None, + random_state=None): + self.model_Y_W = clone(model_Y_W, safe=False) + self.model_T_W = clone(model_T_W, safe=False) + self.model_T_WZ = clone(model_T_WZ, safe=False) + super().__init__(discrete_instrument=discrete_instrument, + discrete_treatment=discrete_treatment, + categories=categories, + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) - super().__init__(_ProjectedDMLATEIVModelNuisance( - model_Y_W=_FirstStageWrapper( - model_Y_W, discrete_target=False), - model_T_W=_FirstStageWrapper( - model_T_W, discrete_target=discrete_treatment), - model_T_WZ=_FirstStageWrapper( - model_T_WZ, discrete_target=discrete_treatment)), - discrete_treatment=discrete_treatment, discrete_instrument=discrete_instrument, - categories=categories, - n_splits=n_splits, random_state=random_state) + def _gen_ortho_learner_model_nuisance(self): + return _ProjectedDMLATEIVModelNuisance( + model_Y_W=_FirstStageWrapper(clone(self.model_Y_W, safe=False), discrete_target=False), + model_T_W=_FirstStageWrapper(clone(self.model_T_W, safe=False), discrete_target=self.discrete_treatment), + model_T_WZ=_FirstStageWrapper(clone(self.model_T_WZ, safe=False), + discrete_target=self.discrete_instrument)) class _BaseDMLIVModelNuisance: @@ -494,6 +522,9 @@ class _BaseDMLIV(_OrthoLearner): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -501,18 +532,19 @@ class _BaseDMLIV(_OrthoLearner): by :mod:`np.random`. """ - def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final, - discrete_instrument=False, discrete_treatment=False, categories='auto', - n_splits=2, random_state=None): - super().__init__(_BaseDMLIVModelNuisance(model_Y_X, model_T_X, model_T_XZ), - _BaseDMLIVModelFinal(model_final), - discrete_treatment=discrete_treatment, discrete_instrument=discrete_instrument, + def __init__(self, discrete_instrument=False, discrete_treatment=False, categories='auto', + n_splits=2, monte_carlo_iterations=None, random_state=None): + super().__init__(discrete_treatment=discrete_treatment, + discrete_instrument=discrete_instrument, categories=categories, - n_splits=n_splits, random_state=random_state) + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + random_state=random_state) @_deprecate_positional("Z and X should be passed by keyword only. In a future release " "we will disallow passing Z and X by position.", ['X', 'Z']) - def fit(self, Y, T, Z, X=None, *, sample_weight=None, sample_var=None, groups=None, inference=None): + def fit(self, Y, T, Z, X=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference=None): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -534,6 +566,8 @@ def fit(self, Y, T, Z, X=None, *, sample_weight=None, sample_var=None, groups=No All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string,:class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of:class:`.BootstrapInference`). @@ -545,7 +579,7 @@ def fit(self, Y, T, Z, X=None, *, sample_weight=None, sample_var=None, groups=No # Replacing fit from _OrthoLearner, to enforce W=None and improve the docstring return super().fit(Y, T, X=X, Z=Z, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) def score(self, Y, T, Z, X=None): """ @@ -578,18 +612,18 @@ def score(self, Y, T, Z, X=None): @property def original_featurizer(self): - return super().model_final._model_final._original_featurizer + return self.ortho_learner_model_final._model_final._original_featurizer @property - def featurizer(self): + def featurizer_(self): # NOTE This is used by the inference methods and has to be the overall featurizer. intended # for internal use by the library - return super().model_final._model_final._featurizer + return self.ortho_learner_model_final._model_final._featurizer @property - def model_final(self): + def model_final_(self): # NOTE This is used by the inference methods and is more for internal use to the library - return super().model_final._model_final._model + return self.ortho_learner_model_final._model_final._model @property def model_cate(self): @@ -602,7 +636,7 @@ def model_cate(self): An instance of the model_final object that was fitted after calling fit which corresponds to the constant marginal CATE model. """ - return super().model_final._model_final._model + return self.ortho_learner_model_final._model_final._model @property def models_Y_X(self): @@ -615,7 +649,7 @@ def models_Y_X(self): A list of instances of the `model_Y_X` object. Each element corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [mdl._model for mdl in super().models_Y_X] + return [mdl._model_Y_X._model for mdl in super().models_nuisance] @property def models_T_X(self): @@ -628,7 +662,7 @@ def models_T_X(self): A list of instances of the `model_T_X` object. Each element corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [mdl._model for mdl in super().models_T_X] + return [mdl._model_T_X._model for mdl in super().models_nuisance] @property def models_T_XZ(self): @@ -641,7 +675,7 @@ def models_T_XZ(self): A list of instances of the `model_T_XZ` object. Each element corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [mdl._model for mdl in super().models_T_XZ] + return [mdl._model_T_XZ._model for mdl in super().models_nuisance] @property def nuisance_scores_Y_X(self): @@ -746,6 +780,9 @@ class DMLIV(_BaseDMLIV): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + discrete_instrument: bool, optional, default False Whether the instrument values should be treated as categorical, rather than continuous, quantities @@ -763,25 +800,41 @@ class DMLIV(_BaseDMLIV): by :mod:`np.random`. """ - def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final, featurizer=None, + def __init__(self, *, model_Y_X, model_T_X, model_T_XZ, model_final, + featurizer=None, fit_cate_intercept=True, - n_splits=2, discrete_instrument=False, discrete_treatment=False, + n_splits=2, + monte_carlo_iterations=None, + discrete_instrument=False, discrete_treatment=False, categories='auto', random_state=None): - self.bias_part_of_coef = fit_cate_intercept + self.model_Y_X = clone(model_Y_X, safe=False) + self.model_T_X = clone(model_Y_X, safe=False) + self.model_T_XZ = clone(model_Y_X, safe=False) + self.model_final = clone(model_final, safe=False) + self.featurizer = clone(featurizer, safe=False) self.fit_cate_intercept = fit_cate_intercept - super().__init__(_FirstStageWrapper(model_Y_X, False), - _FirstStageWrapper(model_T_X, discrete_treatment), - _FirstStageWrapper(model_T_XZ, discrete_treatment), - _FinalWrapper(model_final, - fit_cate_intercept=fit_cate_intercept, - featurizer=featurizer, - use_weight_trick=False), - n_splits=n_splits, + super().__init__(n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment, categories=categories, random_state=random_state) + def _gen_ortho_learner_model_nuisance(self): + return _BaseDMLIVModelNuisance(_FirstStageWrapper(clone(self.model_Y_X, safe=False), False), + _FirstStageWrapper(clone(self.model_T_X, safe=False), self.discrete_treatment), + _FirstStageWrapper(clone(self.model_T_XZ, safe=False), self.discrete_treatment)) + + def _gen_ortho_learner_model_final(self): + return _BaseDMLIVModelFinal(_FinalWrapper(clone(self.model_final, safe=False), + fit_cate_intercept=self.fit_cate_intercept, + featurizer=clone(self.featurizer, safe=False), + use_weight_trick=False)) + + @property + def bias_part_of_coef(self): + return self.ortho_learner_model_final._model_final._fit_cate_intercept + class NonParamDMLIV(_BaseDMLIV): """ @@ -841,6 +894,9 @@ class NonParamDMLIV(_BaseDMLIV): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + discrete_instrument: bool, optional, default False Whether the instrument values should be treated as categorical, rather than continuous, quantities @@ -859,23 +915,38 @@ class NonParamDMLIV(_BaseDMLIV): """ - def __init__(self, model_Y_X, model_T_X, model_T_XZ, model_final, + def __init__(self, *, model_Y_X, model_T_X, model_T_XZ, model_final, featurizer=None, fit_cate_intercept=True, - n_splits=2, discrete_instrument=False, discrete_treatment=False, categories='auto', + n_splits=2, + monte_carlo_iterations=None, + discrete_instrument=False, + discrete_treatment=False, + categories='auto', random_state=None): - super().__init__(_FirstStageWrapper(model_Y_X, False), - _FirstStageWrapper(model_T_X, discrete_treatment), - _FirstStageWrapper(model_T_XZ, discrete_treatment), - _FinalWrapper(model_final, - fit_cate_intercept=fit_cate_intercept, - featurizer=featurizer, - use_weight_trick=True), - n_splits=n_splits, + self.model_Y_X = clone(model_Y_X, safe=False) + self.model_T_X = clone(model_Y_X, safe=False) + self.model_T_XZ = clone(model_Y_X, safe=False) + self.model_final = clone(model_final, safe=False) + self.featurizer = clone(featurizer, safe=False) + self.fit_cate_intercept = fit_cate_intercept + super().__init__(n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment, categories=categories, random_state=random_state) + def _gen_ortho_learner_model_nuisance(self): + return _BaseDMLIVModelNuisance(_FirstStageWrapper(clone(self.model_Y_X, safe=False), False), + _FirstStageWrapper(clone(self.model_T_X, safe=False), self.discrete_treatment), + _FirstStageWrapper(clone(self.model_T_XZ, safe=False), self.discrete_treatment)) + + def _gen_ortho_learner_model_final(self): + return _BaseDMLIVModelFinal(_FinalWrapper(clone(self.model_final, safe=False), + fit_cate_intercept=self.fit_cate_intercept, + featurizer=clone(self.featurizer, safe=False), + use_weight_trick=True)) + class _BaseDRIVModelFinal: """ @@ -1036,6 +1107,9 @@ class _BaseDRIV(_OrthoLearner): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; @@ -1044,33 +1118,42 @@ class _BaseDRIV(_OrthoLearner): """ def __init__(self, - nuisance_models, model_final, featurizer=None, fit_cate_intercept=True, - cov_clip=0.1, opt_reweighted=False, - discrete_instrument=False, discrete_treatment=False, + cov_clip=0.1, + opt_reweighted=False, + discrete_instrument=False, + discrete_treatment=False, categories='auto', - n_splits=2, random_state=None): - + n_splits=2, + monte_carlo_iterations=None, + random_state=None): + self.model_final = clone(model_final, safe=False) + self.featurizer = clone(featurizer, safe=False) self.fit_cate_intercept = fit_cate_intercept - self.bias_part_of_coef = fit_cate_intercept - self.cov_clip = cov_clip self.opt_reweighted = opt_reweighted - super().__init__(nuisance_models, _BaseDRIVModelFinal(model_final, - featurizer, - discrete_treatment, - discrete_instrument, - fit_cate_intercept, - cov_clip, - opt_reweighted), - discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment, - categories=categories, n_splits=n_splits, random_state=random_state) + super().__init__(discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment, + categories=categories, n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, random_state=random_state) + + def _gen_model_final(self): + return clone(self.model_final, safe=False) + + def _gen_ortho_learner_model_final(self): + return _BaseDRIVModelFinal(self._gen_model_final(), + clone(self.featurizer, safe=False), + self.discrete_treatment, + self.discrete_instrument, + self.fit_cate_intercept, + self.cov_clip, + self.opt_reweighted) @_deprecate_positional("X, W, and Z should be passed by keyword only. In a future release " "we will disallow passing X, W, and Z by position.", ['X', 'W', 'Z']) - def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference=None): + def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference=None): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -1094,6 +1177,8 @@ def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, g All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string,:class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of:class:`.BootstrapInference`). @@ -1105,7 +1190,7 @@ def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, g # Replacing fit from _OrthoLearner, to reorder arguments and improve the docstring return super().fit(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) def score(self, Y, T, Z, X=None, W=None, sample_weight=None): """ @@ -1143,18 +1228,18 @@ def score(self, Y, T, Z, X=None, W=None, sample_weight=None): @property def original_featurizer(self): - return super().model_final._original_featurizer + return self.ortho_learner_model_final._original_featurizer @property - def featurizer(self): + def featurizer_(self): # NOTE This is used by the inference methods and has to be the overall featurizer. intended # for internal use by the library - return super().model_final._featurizer + return self.ortho_learner_model_final._featurizer @property - def model_final(self): + def model_final_(self): # NOTE This is used by the inference methods and is more for internal use to the library - return super().model_final._model_final + return self.ortho_learner_model_final._model_final def cate_feature_names(self, feature_names=None): """ @@ -1243,31 +1328,46 @@ class _IntentToTreatDRIV(_BaseDRIV): Helper class for the DRIV algorithm for the intent-to-treat A/B test setting """ - def __init__(self, model_Y_X, model_T_XZ, + def __init__(self, *, model_Y_X, + model_T_XZ, prel_model_effect, - model_effect, + model_final, featurizer=None, fit_cate_intercept=True, cov_clip=.1, n_splits=3, + monte_carlo_iterations=None, opt_reweighted=False, categories='auto', random_state=None): """ """ - + self.model_Y_X = clone(model_Y_X, safe=False) + self.model_T_XZ = clone(model_T_XZ, safe=False) + self.prel_model_effect = clone(prel_model_effect, safe=False) # TODO: check that Y, T, Z do not have multiple columns - super().__init__(_IntentToTreatDRIVModelNuisance(model_Y_X, model_T_XZ, prel_model_effect), - model_effect, + super().__init__(model_final, featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, cov_clip=cov_clip, n_splits=n_splits, - discrete_instrument=True, discrete_treatment=True, + monte_carlo_iterations=monte_carlo_iterations, + discrete_instrument=True, + discrete_treatment=True, categories=categories, opt_reweighted=opt_reweighted, random_state=random_state) + def _gen_prel_model_effect(self): + return clone(self.prel_model_effect, safe=False) + + def _gen_ortho_learner_model_nuisance(self): + return _IntentToTreatDRIVModelNuisance(_FirstStageWrapper(clone(self.model_Y_X, safe=False), + discrete_target=False), + _FirstStageWrapper(clone(self.model_T_XZ, safe=False), + discrete_target=True), + self._gen_prel_model_effect()) + class _DummyCATE: """ @@ -1277,7 +1377,7 @@ class _DummyCATE: def __init__(self): return - def fit(self, y, T, *, Z, X, W=None, sample_weight=None, groups=None): + def fit(self, y, T, *, Z, X, W=None, sample_weight=None, groups=None, **kwargs): return self def effect(self, X): @@ -1332,6 +1432,9 @@ class IntentToTreatDRIV(_IntentToTreatDRIV): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + opt_reweighted : bool, optional, default False Whether to reweight the samples to minimize variance. If True then final_model_effect.fit must accept sample_weight as a kw argument (WeightWrapper from @@ -1351,37 +1454,46 @@ class IntentToTreatDRIV(_IntentToTreatDRIV): by :mod:`np.random`. """ - def __init__(self, model_Y_X, model_T_XZ, + def __init__(self, *, model_Y_X, model_T_XZ, flexible_model_effect, - final_model_effect=None, + model_final=None, featurizer=None, fit_cate_intercept=True, cov_clip=.1, n_splits=3, + monte_carlo_iterations=None, opt_reweighted=False, categories='auto', random_state=None): - model_Y_X = _FirstStageWrapper(model_Y_X, discrete_target=False) - model_T_XZ = _FirstStageWrapper(model_T_XZ, discrete_target=True) - prel_model_effect = _IntentToTreatDRIV(model_Y_X, - model_T_XZ, - _DummyCATE(), - flexible_model_effect, - cov_clip=1e-7, n_splits=1, - opt_reweighted=True, - random_state=random_state) - if final_model_effect is None: - final_model_effect = flexible_model_effect - super().__init__(model_Y_X, model_T_XZ, prel_model_effect, - final_model_effect, + self.flexible_model_effect = flexible_model_effect + super().__init__(model_Y_X=model_Y_X, + model_T_XZ=model_T_XZ, + prel_model_effect=None, + model_final=model_final, featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, cov_clip=cov_clip, n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, opt_reweighted=opt_reweighted, categories=categories, random_state=random_state) + def _gen_model_final(self): + if self.model_final is None: + return clone(self.flexible_model_effect, safe=False) + return clone(self.model_final, safe=False) + + def _gen_prel_model_effect(self): + return _IntentToTreatDRIV(model_Y_X=clone(self.model_Y_X, safe=False), + model_T_XZ=clone(self.model_T_XZ, safe=False), + prel_model_effect=_DummyCATE(), + model_final=clone(self.flexible_model_effect, safe=False), + cov_clip=1e-7, + n_splits=1, + opt_reweighted=True, + random_state=self.random_state) + @property def models_Y_X(self): return [mdl._model_Y_X._model for mdl in super().models_nuisance] @@ -1402,6 +1514,15 @@ def nuisance_scores_T_XZ(self): def nuisance_scores_effect(self): return self.nuisance_scores_[2] + @property + def prel_model_effect(self): + return self._gen_prel_model_effect() + + @prel_model_effect.setter + def prel_model_effect(self, value): + if value is not None: + raise ValueError("Parameter `prel_model_effect` cannot be altered for this estimator.") + class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV): """ @@ -1446,6 +1567,9 @@ class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV): Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + monte_carlo_iterations: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + categories: 'auto' or list, default 'auto' The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). The first category will be treated as the control treatment. @@ -1457,26 +1581,36 @@ class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV): by :mod:`np.random`. """ - def __init__(self, model_Y_X, model_T_XZ, + def __init__(self, *, model_Y_X, + model_T_XZ, flexible_model_effect, featurizer=None, fit_cate_intercept=True, cov_clip=.1, n_splits=3, + monte_carlo_iterations=None, categories='auto', random_state=None): - super().__init__(model_Y_X, model_T_XZ, + super().__init__(model_Y_X=model_Y_X, + model_T_XZ=model_T_XZ, flexible_model_effect=flexible_model_effect, featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, - final_model_effect=StatsModelsLinearRegression(fit_intercept=False), - cov_clip=cov_clip, n_splits=n_splits, opt_reweighted=False, + model_final=None, + cov_clip=cov_clip, + n_splits=n_splits, + monte_carlo_iterations=monte_carlo_iterations, + opt_reweighted=False, categories=categories, random_state=random_state) + def _gen_model_final(self): + return StatsModelsLinearRegression(fit_intercept=False) + # override only so that we can update the docstring to indicate support for `StatsModelsInference` @_deprecate_positional("X, W, and Z should be passed by keyword only. In a future release " "we will disallow passing X, W, and Z by position.", ['X', 'W', 'Z']) - def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, inference='auto'): + def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None, + cache_values=False, inference='auto'): """ Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. @@ -1500,6 +1634,8 @@ def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, g All rows corresponding to the same group will be kept together during splitting. If groups is not None, the n_splits argument passed to this class's initializer must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model inference: string,:class:`.Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of:class:`.BootstrapInference`) and 'statsmodels' @@ -1511,4 +1647,29 @@ def fit(self, Y, T, Z, X=None, W=None, *, sample_weight=None, sample_var=None, g """ return super().fit(Y, T, Z=Z, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, groups=groups, - inference=inference) + cache_values=cache_values, inference=inference) + + def refit(self, inference='auto'): + return super().refit(inference=inference) + + @property + def bias_part_of_coef(self): + return self.ortho_learner_model_final._fit_cate_intercept + + @property + def model_final(self): + return self._gen_model_final() + + @model_final.setter + def model_final(self, value): + if value is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator.") + + @property + def opt_reweighted(self): + return False + + @opt_reweighted.setter + def opt_reweighted(self, value): + if not (value == False): + raise ValueError("Parameter `value` cannot be altered from `False` for this estimator.")