Skip to content

Commit

Permalink
Fix a few 0.9.0 bugs (#422)
Browse files Browse the repository at this point in the history
* Relax sklearn requirement `>=0.24.0` --> `>0.22.0`

* Make `cate_feature_names` more robust to different featurizers

* Address PR comments

* Update name featurization in shap
  • Loading branch information
Miruna Oprescu authored Mar 3, 2021
1 parent 5e31584 commit 2cc9f62
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 39 deletions.
2 changes: 1 addition & 1 deletion econml/_cate_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _prefit(self, Y, T, *args, **kwargs):
self._d_t = np.shape(T)[1:]
# This works only if X is passed as a kwarg
# We plan to enforce X as kwarg only in future releases
if not hasattr(self, "_input_names_set"):
if not hasattr(self, "_input_names_set") or not self._input_names_set:
# This checks if names have been set in a child class
# If names were set in a child class, don't do it again
X = kwargs.get('X')
Expand Down
7 changes: 2 additions & 5 deletions econml/_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import shap
from collections import defaultdict
import numpy as np
from .utilities import broadcast_unit_treatments, cross_product
from .utilities import broadcast_unit_treatments, cross_product, get_feature_names_or_default


def _shap_explain_cme(cme_model, X, d_t, d_y,
Expand Down Expand Up @@ -392,9 +392,6 @@ def _define_names(d_t, d_y, treatment_names, output_names, feature_names, input_
feature_names = input_names['feature_names']
if featurizer is None:
transformed_feature_names = feature_names
elif featurizer is not None and hasattr(featurizer, 'get_feature_names'):
transformed_feature_names = featurizer.get_feature_names(feature_names)
else:
transformed_feature_names = None

transformed_feature_names = get_feature_names_or_default(featurizer, feature_names)
return (d_t, d_y, treatment_names, output_names, feature_names, transformed_feature_names)
9 changes: 3 additions & 6 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
broadcast_unit_treatments, check_high_dimensional,
cross_product, deprecated, fit_with_groups,
hstack, inverse_onehot, ndim, reshape,
reshape_treatmentwise_effects, shape, transpose)
reshape_treatmentwise_effects, shape, transpose,
get_feature_names_or_default)
from .._shap import _shap_explain_model_cate


Expand Down Expand Up @@ -281,11 +282,7 @@ def cate_feature_names(self, feature_names=None):
feature_names = self._input_names["feature_names"]
if self.original_featurizer is None:
return feature_names
elif hasattr(self.original_featurizer, 'get_feature_names'):
# This fails if X=None and featurizer is not None, but that case is handled above
return self.original_featurizer.get_feature_names(feature_names)
else:
raise AttributeError("Featurizer does not have a method: get_feature_names!")
return get_feature_names_or_default(self.original_featurizer, feature_names)


class DML(LinearModelFinalCateEstimatorMixin, _BaseDML):
Expand Down
8 changes: 2 additions & 6 deletions econml/dr/_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from ..sklearn_extensions.linear_model import (
DebiasedLasso, StatsModelsLinearRegression, WeightedLassoCVWrapper)
from ..utilities import (_deprecate_positional, check_high_dimensional,
filter_none_kwargs, fit_with_groups, inverse_onehot)
filter_none_kwargs, fit_with_groups, inverse_onehot, get_feature_names_or_default)
from .._shap import _shap_explain_multitask_model_cate, _shap_explain_model_cate


Expand Down Expand Up @@ -631,11 +631,7 @@ def cate_feature_names(self, feature_names=None):
feature_names = self._input_names["feature_names"]
if self.featurizer_ is None:
return feature_names
elif hasattr(self.featurizer_, 'get_feature_names'):
# This fails if X=None and featurizer is not None, but that case is handled above
return self.featurizer_.get_feature_names(feature_names)
else:
raise AttributeError("Featurizer does not have a method: get_feature_names!")
return get_feature_names_or_default(self.featurizer_, feature_names)

@property
def model_final_(self):
Expand Down
7 changes: 2 additions & 5 deletions econml/iv/dml/_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..._cate_estimator import LinearModelFinalCateEstimatorMixin, StatsModelsCateEstimatorMixin
from ...inference import StatsModelsInference
from ...sklearn_extensions.linear_model import StatsModelsLinearRegression
from ...utilities import _deprecate_positional
from ...utilities import _deprecate_positional, get_feature_names_or_default
from .._nuisance_wrappers import _FirstStageWrapper, _FinalWrapper


Expand Down Expand Up @@ -676,10 +676,7 @@ def cate_feature_names(self, feature_names=None):
feature_names = self._input_names["feature_names"]
if self.original_featurizer is None:
return feature_names
elif hasattr(self.original_featurizer, 'get_feature_names'):
return self.original_featurizer.get_feature_names(feature_names)
else:
raise AttributeError("Featurizer does not have a method: get_feature_names!")
return get_feature_names_or_default(self.original_featurizer, feature_names)


class DMLIV(LinearModelFinalCateEstimatorMixin, _BaseDMLIV):
Expand Down
7 changes: 2 additions & 5 deletions econml/iv/dr/_dr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ...inference import StatsModelsInference
from ...sklearn_extensions.linear_model import StatsModelsLinearRegression
from ...utilities import (_deprecate_positional, add_intercept, filter_none_kwargs,
inverse_onehot)
inverse_onehot, get_feature_names_or_default)
from .._nuisance_wrappers import _FirstStageWrapper, _FinalWrapper


Expand Down Expand Up @@ -354,10 +354,7 @@ def cate_feature_names(self, feature_names=None):
feature_names = self._input_names["feature_names"]
if self.original_featurizer is None:
return feature_names
elif hasattr(self.original_featurizer, 'get_feature_names'):
return self.original_featurizer.get_feature_names(feature_names)
else:
raise AttributeError("Featurizer does not have a method: get_feature_names!")
return get_feature_names_or_default(self.original_featurizer, feature_names)


class _IntentToTreatDRIVModelNuisance:
Expand Down
22 changes: 20 additions & 2 deletions econml/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
from econml.ortho_forest import DMLOrthoForest, DROrthoForest
from econml.sklearn_extensions.linear_model import WeightedLasso
from econml.metalearners import XLearner, SLearner, TLearner
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression, MultiTaskLasso, LassoCV
from sklearn.preprocessing import PolynomialFeatures, FunctionTransformer
from econml.ortho_iv import LinearIntentToTreatDRIV
from econml.deepiv import DeepIVEstimator

Expand Down Expand Up @@ -64,7 +66,23 @@ def test_dml(self):
treatment_effects = est.effect(X)
lb, ub = est.effect_interval(X, alpha=0.05)
self._check_input_names(est.summary()) # Check that names propagate as expected
# Test re-fit
# |--> Test featurizers
est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
est.fit(Y, T, X=X, W=W, inference='statsmodels')
self._check_input_names(
est.summary(),
feat_comp=est.original_featurizer.get_feature_names(X.columns))
est.featurizer = FunctionTransformer()
est.fit(Y, T, X=X, W=W, inference='statsmodels')
self._check_input_names(
est.summary(),
feat_comp=[f"feat(X){i}" for i in range(TestPandasIntegration.n_features)])
est.featurizer = ColumnTransformer([('passthrough', 'passthrough', [0])])
est.fit(Y, T, X=X, W=W, inference='statsmodels')
# ColumnTransformer doesn't propagate column names
self._check_input_names(est.summary(), feat_comp=["x0"])
# |--> Test re-fit
est.featurizer = None
X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns})
est.fit(Y, T, X=X1, W=W, inference='statsmodels')
self._check_input_names(est.summary(), feat_comp=X1.columns)
Expand All @@ -74,7 +92,7 @@ def test_dml(self):
treatment_effects = est.effect(X)
lb, ub = est.effect_interval(X, alpha=0.05)
self._check_input_names(est.summary()) # Check that names propagate as expected
# ForestDML
# Test ForestDML
est = ForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor())
est.fit(Y, T, X=X, W=W, inference='blb')
treatment_effects = est.effect(X)
Expand Down
49 changes: 41 additions & 8 deletions econml/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
import scipy.sparse
import sparse as sp
import itertools
import inspect
from operator import getitem
from collections import defaultdict, Counter
from sklearn import clone
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
from functools import reduce, wraps
from sklearn.utils import check_array, check_X_y
from sklearn.utils.validation import assert_all_finite
import warnings
from warnings import warn
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
Expand Down Expand Up @@ -512,7 +514,7 @@ def check_inputs(Y, T, X, W=None, multi_output_T=True, multi_output_Y=True):
return Y, T, X, W


def check_input_arrays(*args, validate_len=True):
def check_input_arrays(*args, validate_len=True, force_all_finite=True):
"""Cast input sequences into numpy arrays.
Only inputs that are sequence-like will be converted, all other inputs will be left as is.
Expand All @@ -526,23 +528,35 @@ def check_input_arrays(*args, validate_len=True):
validate_len : bool (default=True)
Whether to check if the input arrays have the same length.
force_all_finite : bool (default=True)
Whether to allow inf and nan in input arrays.
Returns
-------
args: array-like
List of inputs where sequence-like objects have been cast to numpy arrays.
"""
args = [check_array(arg, dtype=None, ensure_2d=False, accept_sparse=True)
if np.ndim(arg) > 0 else arg for arg in args]
if validate_len:
n = None
for arg in args:
if np.ndim(arg) > 0:
m = arg.shape[0]
n = None
args = list(args)
for i, arg in enumerate(args):
if np.ndim(arg) > 0:
new_arg = check_array(arg, dtype=None, ensure_2d=False, accept_sparse=True,
force_all_finite=force_all_finite)
if not force_all_finite:
# For when checking input values is disabled
try:
assert_all_finite(new_arg)
except ValueError:
warnings.warn("Input contains NaN, infinity or a value too large for dtype('float64') "
"but input check is disabled. Check the inputs before proceeding.")
if validate_len:
m = new_arg.shape[0]
if n is None:
n = m
else:
assert (m == n), "Input arrays have incompatible lengths: {} and {}".format(n, m)
args[i] = new_arg
return args


Expand Down Expand Up @@ -582,6 +596,25 @@ def get_input_columns(X, prefix="X"):
return [f"{prefix}{i}" for i in range(len_X)]


def get_feature_names_or_default(featurizer, feature_names):
if hasattr(featurizer, 'get_feature_names'):
# Get number of arguments, some sklearn featurizer don't accept feature_names
arg_no = len(inspect.getfullargspec(featurizer.get_feature_names).args)
if arg_no == 1:
return featurizer.get_feature_names()
elif arg_no == 2:
return featurizer.get_feature_names(feature_names)
# Featurizer doesn't have 'get_feature_names' or has atypical 'get_feature_names'
try:
# Get feature names using featurizer
dummy_X = np.ones((1, len(feature_names)))
return get_input_columns(featurizer.transform(dummy_X), prefix="feat(X)")
except Exception:
# All attempts at retrieving transformed feature names have failed
# Delegate handling to downstream logic
return None


def check_models(models, n):
"""
Input validation for metalearner models.
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ packages = find_namespace:
install_requires =
numpy
scipy > 1.4.0
scikit-learn >= 0.24
scikit-learn > 0.22.0
sparse
joblib >= 0.13.0
numba != 0.42.1
Expand Down

0 comments on commit 2cc9f62

Please sign in to comment.