From dd01d3fd06b4709c0fd525b085fbbf75074e9b7d Mon Sep 17 00:00:00 2001 From: Adithya Balaji Date: Wed, 7 Aug 2019 17:25:50 -0400 Subject: [PATCH 01/37] Temp --- foreshadow/smart/smart.py | 35 ++++++++------- foreshadow/steps/cleaner.py | 16 ------- testing.py | 89 +++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 31 deletions(-) create mode 100644 testing.py diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index b82a773..ffe7e01 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -62,11 +62,11 @@ def __init__( name=None, keep_columns=False, check_wrapped=True, - **kwargs, + **transformer_kwargs, ): self.name = name self.keep_columns = keep_columns - self.kwargs = kwargs + self.transformer_kwargs = transformer_kwargs self.column_sharer = column_sharer # TODO will need to add the above when this is no longer wrapped self.y_var = y_var @@ -107,12 +107,13 @@ def transformer(self, value): """ value = deepcopy(value) if isinstance(value, str): - value = get_transformer(value)(**self.kwargs) + # This pathway is used at init time of the object + value = get_transformer(value)(**self.transformer_kwargs) self.unset_resolve() elif isinstance(value, dict): class_name = value.pop("class_name") - self.kwargs.update(value) - value = get_transformer(class_name)(**self.kwargs) + self.transformer_kwargs.update(value) + value = get_transformer(class_name)(**self.transformer_kwargs) self.unset_resolve() # Check transformer type is_trans = is_transformer(value) @@ -179,20 +180,24 @@ def set_params(self, **params): transformer_params = params.pop("transformer", self.transformer) super().set_params(**params) - # Calls to override auto set the transformer instance + # Validate new instantiation of transformer object + transformer_name = None if ( isinstance(transformer_params, dict) and "class_name" in transformer_params - ): # instantiate a - # new - # self.transformer - self.transformer = transformer_params + ): + # instantiate a new self.transformer + transformer_name = transformer_params.pop("class_name") + self.transformer_kwargs = transformer_params + elif isinstance(transformer_params, str): + transformer_name = transformer_params + self.transformer_kwargs = {} + + # Calls to override auto set the transformer instance + if transformer_name is not None: + # instantiate a new self.transformer + self.transformer = transformer_name elif self.transformer is not None: - # valid_params = { - # k.partition("__")[2]: v - # for k, v in params.items() - # if k.split("__")[0] == "transformer" - # } self.transformer.set_params(**transformer_params) self.transformer.set_extra_params( name=type(self.transformer).__name__, diff --git a/foreshadow/steps/cleaner.py b/foreshadow/steps/cleaner.py index d3ce524..9dadf9a 100644 --- a/foreshadow/steps/cleaner.py +++ b/foreshadow/steps/cleaner.py @@ -36,19 +36,3 @@ def get_mapping(self, X): ], cols=X.columns, ) - - # def __repr__(self): - # """Return string representation of this object with parent params. - # - # Returns: - # See above. - # - # """ - # r = super().__repr__() - # preparer_params = self._preparer_params() - # preparer_params = {p: getattr(self, p, None) - # for p in preparer_params} - # preparer_print = ", ".join( - # ["{}={}".format(k, v) for k, v in preparer_params.items()] - # ) - # return r[:-1] + preparer_print + ")" diff --git a/testing.py b/testing.py new file mode 100644 index 0000000..38f44bd --- /dev/null +++ b/testing.py @@ -0,0 +1,89 @@ +from foreshadow.preparer import DataPreparer +from foreshadow.steps import CleanerMapper +from foreshadow.steps import IntentMapper +from foreshadow.steps import Preprocessor +from foreshadow.columnsharer import ColumnSharer +import pandas as pd + +from sklearn.model_selection import RandomizedSearchCV +from sklearn.pipeline import Pipeline +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score + +import sklearn.datasets as dt + +from foreshadow.smart import Scaler + +from foreshadow.utils.testing import debug; debug() + +from hyperopt import hp +import hyperopt.pyll.stochastic + +data = dt.load_iris() + +X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] +y_data = pd.DataFrame(data.target, columns=['target']) + +# cs = ColumnSharer() +# p = Preprocessor(column_sharer=cs) +s = Scaler() +lr = LogisticRegression() + +pipe = Pipeline([('s', s), ('lr', lr)]) + +pipe.fit(X_data, y_data) + +param_distributions = hp.choice( + 's__transformer', + [ + { + 'class_name': 'StandardScaler', + 'with_mean': hp.choice('wm', [False, True]), + }, + { + 'class_name': 'MinMaxScaler', + 'feature_range': hp.choice('fr', [(0, 1), (0, 0.5)]) + } +]) + +import pdb; pdb.set_trace() + +# combinations.yaml +""" +combinations: + X_preparer.cleaner.CHAS: + Cleaner: + - date: + - p1 + - p2 + - financial + IntentMapper: + - Something + + X_preparer.cleaner.CHAS.CleanerMapper: + -Something + + X_preparer.cleaner.CHAS.IntentMapper: + -Something + + + X_preparer: + cleaner: + CHAS: + Cleaner: + date: + -p1 + -p2 + +""" + +rscv = RandomizedSearchCV(pipe, param_distributions, iid=True, n_iter=2, scoring='accuracy') + +# print("Train Accuracy: {}".format(accuracy_score(y_data, pipe.predict(X_data)))) + +rscv.fit(X_data, y_data) +results = pd.DataFrame(rscv.cv_results_) +results = results[[c for c in results.columns if all(s not in c for s in ['time', 'params'])]] + + +import pdb; pdb.set_trace() \ No newline at end of file From 059f423f6b86cd4f7aa94c0d88f2d169fec21cd4 Mon Sep 17 00:00:00 2001 From: Adithya Balaji Date: Fri, 9 Aug 2019 15:51:25 -0400 Subject: [PATCH 02/37] MVP for param search --- searcher_script.py | 140 +++++++++++++++++++++++++++++++++++++++++++++ testing.py | 89 ---------------------------- 2 files changed, 140 insertions(+), 89 deletions(-) create mode 100644 searcher_script.py delete mode 100644 testing.py diff --git a/searcher_script.py b/searcher_script.py new file mode 100644 index 0000000..8745ee6 --- /dev/null +++ b/searcher_script.py @@ -0,0 +1,140 @@ +from foreshadow.preparer import DataPreparer +from foreshadow.steps import CleanerMapper +from foreshadow.steps import IntentMapper +from foreshadow.steps import Preprocessor +from foreshadow.columnsharer import ColumnSharer +import pandas as pd + +from sklearn.model_selection import RandomizedSearchCV +from sklearn.pipeline import Pipeline +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score + +import sklearn.datasets as dt + +from foreshadow.smart import Scaler + +from foreshadow.utils.testing import debug; debug() + +from hyperopt import hp +import hyperopt.pyll.stochastic as stoch + +from sklearn.model_selection import ParameterSampler +from sklearn.model_selection._search import BaseSearchCV +from sklearn.utils.fixes import _Mapping as Mapping, _Sequence as Sequence +import six +import numpy as np + +data = dt.load_iris() + +X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] +y_data = pd.DataFrame(data.target, columns=['target']) + +# cs = ColumnSharer() +# p = Preprocessor(column_sharer=cs) +s = Scaler() +lr = LogisticRegression() + +pipe = Pipeline([('s', s), ('lr', lr)]) + +pipe.fit(X_data, y_data) + +param_distributions = { + 's__transformer': hp.choice( + 's__transformer', + [ + { + 'class_name': 'StandardScaler', + 'with_mean': hp.choice('with_mean', [False, True]), + }, + { + 'class_name': 'MinMaxScaler', + 'feature_range': hp.choice('feature_range', [(0, 1), (0, 0.5)]) + } + ] + ) +} + +from sklearn.utils import check_random_state +class HyperOptSampler(object): + def __init__(self, param_distributions, n_iter, random_state=None): + self.param_distributions = param_distributions + self.n_iter = n_iter + self.random_state = random_state + + def __iter__(self): + # check if all distributions are given as lists + # in this case we want to sample without replacement + rng = check_random_state(self.random_state) + for _ in six.moves.range(self.n_iter): + import pdb; pdb.set_trace() + yield stoch.sample(self.param_distributions, rng=rng) + + def __len__(self): + """Number of points that will be sampled.""" + return self.n_iter + + +class ShadowSearchCV(BaseSearchCV): + def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, + fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, + verbose=0, pre_dispatch='2*n_jobs', random_state=None, + error_score='raise', return_train_score="warn"): + self.param_distributions = param_distributions + self.n_iter = n_iter + self.random_state = random_state + super().__init__( + estimator=estimator, scoring=scoring, fit_params=fit_params, + n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, + pre_dispatch=pre_dispatch, error_score=error_score, + return_train_score=return_train_score) + + def _get_param_iterator(self): + """Return ParameterSampler instance for the given distributions""" + return HyperOptSampler( + self.param_distributions, self.n_iter, + random_state=self.random_state) + +from hpsklearn import HyperoptEstimator, extra_trees +from hyperopt import tpe + + +# combinations.yaml +""" +combinations: + X_preparer.cleaner.CHAS: + Cleaner: + - date: + - p1 + - p2 + - financial + IntentMapper: + - Something + + X_preparer.cleaner.CHAS.CleanerMapper: + -Something + + X_preparer.cleaner.CHAS.IntentMapper: + -Something + + + X_preparer: + cleaner: + CHAS: + Cleaner: + date: + -p1 + -p2 + +""" + +rscv = ShadowSearchCV(pipe, param_distributions, iid=True, scoring='accuracy', n_iter=2) + +# print("Train Accuracy: {}".format(accuracy_score(y_data, pipe.predict(X_data)))) + +rscv.fit(X_data, y_data) +results = pd.DataFrame(rscv.cv_results_) +results = results[[c for c in results.columns if all(s not in c for s in ['time', 'params'])]] + + +import pdb; pdb.set_trace() \ No newline at end of file diff --git a/testing.py b/testing.py deleted file mode 100644 index 38f44bd..0000000 --- a/testing.py +++ /dev/null @@ -1,89 +0,0 @@ -from foreshadow.preparer import DataPreparer -from foreshadow.steps import CleanerMapper -from foreshadow.steps import IntentMapper -from foreshadow.steps import Preprocessor -from foreshadow.columnsharer import ColumnSharer -import pandas as pd - -from sklearn.model_selection import RandomizedSearchCV -from sklearn.pipeline import Pipeline -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score - -import sklearn.datasets as dt - -from foreshadow.smart import Scaler - -from foreshadow.utils.testing import debug; debug() - -from hyperopt import hp -import hyperopt.pyll.stochastic - -data = dt.load_iris() - -X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] -y_data = pd.DataFrame(data.target, columns=['target']) - -# cs = ColumnSharer() -# p = Preprocessor(column_sharer=cs) -s = Scaler() -lr = LogisticRegression() - -pipe = Pipeline([('s', s), ('lr', lr)]) - -pipe.fit(X_data, y_data) - -param_distributions = hp.choice( - 's__transformer', - [ - { - 'class_name': 'StandardScaler', - 'with_mean': hp.choice('wm', [False, True]), - }, - { - 'class_name': 'MinMaxScaler', - 'feature_range': hp.choice('fr', [(0, 1), (0, 0.5)]) - } -]) - -import pdb; pdb.set_trace() - -# combinations.yaml -""" -combinations: - X_preparer.cleaner.CHAS: - Cleaner: - - date: - - p1 - - p2 - - financial - IntentMapper: - - Something - - X_preparer.cleaner.CHAS.CleanerMapper: - -Something - - X_preparer.cleaner.CHAS.IntentMapper: - -Something - - - X_preparer: - cleaner: - CHAS: - Cleaner: - date: - -p1 - -p2 - -""" - -rscv = RandomizedSearchCV(pipe, param_distributions, iid=True, n_iter=2, scoring='accuracy') - -# print("Train Accuracy: {}".format(accuracy_score(y_data, pipe.predict(X_data)))) - -rscv.fit(X_data, y_data) -results = pd.DataFrame(rscv.cv_results_) -results = results[[c for c in results.columns if all(s not in c for s in ['time', 'params'])]] - - -import pdb; pdb.set_trace() \ No newline at end of file From da9c1a18d5ea485eec01d69c4e8d2dff9be312b9 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 12 Aug 2019 15:22:45 -0400 Subject: [PATCH 03/37] Fixing get_params in three ways: 1. extending the _get_param_names and fixing the class to be parents in the wrapper. 2. Fixing PreparerStep to traverse __mro__. 3. Double checking major classes to ensure get_params returns required values. Tests included. --- foreshadow/concrete/internals/fancyimpute.py | 13 +- foreshadow/concrete/internals/labelencoder.py | 27 ++++ foreshadow/estimators/auto.py | 26 ++++ foreshadow/foreshadow.py | 24 ++++ foreshadow/smart/smart.py | 48 +------ foreshadow/steps/__init__.py | 2 + foreshadow/steps/preparerstep.py | 37 +++--- foreshadow/tests/test_core/test_wrapper.py | 3 +- foreshadow/tests/test_estimators/test_meta.py | 29 +++++ foreshadow/tests/test_foreshadow.py | 26 ++++ .../test_internals/test_internal.py | 50 ++++++- .../test_transformers/test_transformers.py | 32 ++--- foreshadow/wrapper.py | 123 ++++++++---------- 13 files changed, 277 insertions(+), 163 deletions(-) diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index b563a85..13b5f5c 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -43,7 +43,7 @@ def get_params(self, deep=True): dict: Parameter names mapped to their values. """ - return {"method": self.method, "impute_kwargs": self.impute_kwargs} + return super().get_params(deep=deep) def set_params(self, **params): """Set the parameters of this estimator. @@ -57,25 +57,22 @@ def set_params(self, **params): ValueError: If method is invalid """ - impute_kwargs = params.pop("impute_kwargs", {}) - method = params.pop("method", self.method) - - self.kwargs = params - self.method = method + out = super().set_params(**params) # Auto import and initialize fancyimpute class defined by method try: from importlib import import_module module = import_module("fancyimpute") - self.cls = getattr(module, method) + self.cls = getattr(module, self.method) except Exception: raise ValueError( "Invalid method. Possible values are BiScaler, KNN, " "NuclearNormMinimization and SoftImpute" ) - self.imputer = self.cls(**impute_kwargs) + self.imputer = self.cls(self.impute_kwargs) + return out def fit(self, X, y=None): """Empty function. diff --git a/foreshadow/concrete/internals/labelencoder.py b/foreshadow/concrete/internals/labelencoder.py index 0d726c7..a79e099 100644 --- a/foreshadow/concrete/internals/labelencoder.py +++ b/foreshadow/concrete/internals/labelencoder.py @@ -62,3 +62,30 @@ def inverse_transform(self, X): """ return self.encoder.inverse_transform(X) + + def get_params(self, deep=True): + """Get parameters for this estimator. See super. + + Args: + deep: deep to super get_params + + Returns: + Params for this estimator. See super. + + """ + params = super().get_params(deep=deep) + if not deep: + params['encoder'] = self.encoder + else: + params['encoder'] = self.encoder.get_params(deep=deep) + return params + + def set_params(self, **params): + """Set parameters for this estimator. See super. + + Args: + **params: params to set on this estimator. + + """ + self.encoder = params.pop('encoder') + super().set_params(**params) diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 8ecd181..04b5b6f 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -315,6 +315,32 @@ def score(self, X, y, sample_weight=None): y = check_df(y) return self.estimator.score(X, y) + def get_params(self, deep=True): + """Get params for this object. See super. + + Args: + deep: True to recursively call get_params, False to not. + + Returns: + params for this object. + + """ + params = super().get_params(deep=deep) + params.update({'estimator': self.estimator, + 'estimator_class': self.estimator_class}) + return params + + def set_params(self, **params): + """Set params for this object. See super. + + Args: + **params: params to set. + + """ + self.estimator = params.pop('estimator', None) + self.estimator_class = params.pop('estimator_class', None) + return super().set_params(**params) + def determine_problem_type(y): """Determine modeling problem type. diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 963b9ba..65176d5 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -281,3 +281,27 @@ def score(self, data_df, y_df=None, sample_weight=None): y_df = check_df(y_df) self._prepare_predict(data_df.columns) return self.pipeline.score(data_df, y_df, sample_weight) + + def get_params(self, deep=True): + """Get params for this object. See super. + + Args: + deep: True to recursively call get_params, False to not. + + Returns: + params for this object. + + """ + params = super().get_params(deep=deep) + params['data_columns'] = self.data_columns + return params + + def set_params(self, **params): + """Set params for this object. See super. + + Args: + **params: params to set. + + """ + self.data_columns = params.pop('data_columns', None) + return super().set_params(**params) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index b82a773..4d895bc 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -105,15 +105,9 @@ def transformer(self, value): nor None. """ - value = deepcopy(value) if isinstance(value, str): value = get_transformer(value)(**self.kwargs) self.unset_resolve() - elif isinstance(value, dict): - class_name = value.pop("class_name") - self.kwargs.update(value) - value = get_transformer(class_name)(**self.kwargs) - self.unset_resolve() # Check transformer type is_trans = is_transformer(value) trans_wrapped = ( @@ -150,20 +144,6 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - transformer_params = {} - if self.transformer is not None: - transformer_params = { - "transformer": self.transformer.get_params(deep=deep) - } - transformer_params["transformer"].update( - {"class_name": type(self.transformer).__name__} - ) - params.update(transformer_params) - params = { - key: val - for key, val in params.items() - if key.find("transformer__") == -1 - } return params def set_params(self, **params): @@ -175,29 +155,11 @@ def set_params(self, **params): **params (dict): any valid parameter of this estimator """ - params = deepcopy(params) - transformer_params = params.pop("transformer", self.transformer) - super().set_params(**params) - - # Calls to override auto set the transformer instance - if ( - isinstance(transformer_params, dict) - and "class_name" in transformer_params - ): # instantiate a - # new - # self.transformer - self.transformer = transformer_params - elif self.transformer is not None: - # valid_params = { - # k.partition("__")[2]: v - # for k, v in params.items() - # if k.split("__")[0] == "transformer" - # } - self.transformer.set_params(**transformer_params) - self.transformer.set_extra_params( - name=type(self.transformer).__name__, - keep_columns=self.keep_columns, - ) + if 'transformer' in params: # required as set_params assumes + # self.transformer will already be the object housed here. We + # have it set to None as it may be anything at runtime. + self.transformer = params['transformer'] + return super().set_params(**params) @abstractmethod def pick_transformer(self, X, y=None, **fit_params): diff --git a/foreshadow/steps/__init__.py b/foreshadow/steps/__init__.py index 653f6e8..3bdf42f 100644 --- a/foreshadow/steps/__init__.py +++ b/foreshadow/steps/__init__.py @@ -5,6 +5,7 @@ from .feature_reducer import FeatureReducerMapper from .mapper import IntentMapper from .preprocessor import Preprocessor +from .preparerstep import PreparerStep __all__ = [ @@ -13,4 +14,5 @@ "Preprocessor", "FeatureEngineererMapper", "FeatureReducerMapper", + "PreparerStep" ] diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 0a450be..ce7e0ca 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -523,25 +523,27 @@ def inverse_transform(self, X, *args, **kwargs): self.check_process(X) return self._parallel_process.inverse_transform(X, *args, **kwargs) - @staticmethod - def _preparer_params(): - init = getattr( - PreparerStep.__init__, "deprecated_original", PreparerStep.__init__ - ) - init_signature = signature(init) - # Consider the constructor parameters excluding 'self' - parameters = [ - p - for p in init_signature.parameters.values() - if p.name != "self" and p.kind != p.VAR_KEYWORD - ] - return [p.name for p in parameters] + @classmethod + def _get_param_names(cls): + """Iteratively get __init__ params for all classes until PreparerStep. + + Returns: + params for all parents up to and including PreparerStep. + Includes the calling classes params. + + """ + params = super()._get_param_names() + while cls.__name__ != PreparerStep.__name__: + cls = cls.__mro__[1] + params += cls._get_param_names() + return params def get_params(self, deep=True): """See super. Overridden to add this parent classes' params to children and to - include _parallel_process + include _parallel_process. _get_param_names holds the logic for + getting all parent params. Args: deep: See super. @@ -551,10 +553,6 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - _preparer_params = self._preparer_params() - params.update( - {key: getattr(self, key, None) for key in _preparer_params} - ) params.update( {"_parallel_process": getattr(self, "_parallel_process", None)} ) @@ -564,7 +562,8 @@ def set_params(self, **params): """See super. Overridden to afld this parent classes' params to children and to - include _parallel_process + include _parallel_process. _get_param_names holds the logic for + getting all parent params. Args: **params: see super. diff --git a/foreshadow/tests/test_core/test_wrapper.py b/foreshadow/tests/test_core/test_wrapper.py index a34fbf7..5b44032 100644 --- a/foreshadow/tests/test_core/test_wrapper.py +++ b/foreshadow/tests/test_core/test_wrapper.py @@ -7,8 +7,7 @@ def test_transformer_wrapper_init(): from foreshadow.concrete import StandardScaler - scaler = StandardScaler() - scaler.set_extra_params(name="test-scaler", keep_columns=True) + scaler = StandardScaler(name="test-scaler", keep_columns=True) assert scaler.name == "test-scaler" assert scaler.keep_columns is True diff --git a/foreshadow/tests/test_estimators/test_meta.py b/foreshadow/tests/test_estimators/test_meta.py index 553d816..d6ba9b0 100644 --- a/foreshadow/tests/test_estimators/test_meta.py +++ b/foreshadow/tests/test_estimators/test_meta.py @@ -1,3 +1,7 @@ +"""Tests for foreshadow/estimators/meta.py.""" +import pytest + + def test_metaestimator_predict(): import numpy as np @@ -72,3 +76,28 @@ def test_metaestimator_score(): assert np.allclose( me.score(X_test, y_test), est.score(X_test, scaler.transform(y_test)) ) + + +@pytest.mark.parametrize( + 'deep', + [True, False] +) +def test_meta_estimator_get_params_keys(deep): + """Test that the desired keys show up for the MetaEstimator object. + + Args: + deep: deep param to get_params + + """ + from foreshadow.estimators.meta import MetaEstimator + from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LinearRegression + me = MetaEstimator(LinearRegression(), StandardScaler()) + params = me.get_params(deep=deep) + + desired_keys = [ + 'estimator', + 'preprocessor' + ] + for key in desired_keys: + assert key in params \ No newline at end of file diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index 2b65f18..08a3e36 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -633,3 +633,29 @@ def test_core_foreshadow_example_classification(): model.fit(X_train, y_train) score = f1_score(y_test, model.predict(X_test), average="weighted") print("Iris score: %f" % score) + + +@pytest.mark.parametrize( + 'deep', + [True, False] +) +def test_foreshadow_get_params_keys(deep): + """Test that the desired keys show up for the Foreshadow object. + + Args: + deep: deep param to get_params + + """ + from foreshadow.foreshadow import Foreshadow + fs = Foreshadow() + params = fs.get_params(deep=deep) + + desired_keys = [ + 'X_preparer', + 'estimator', + 'y_preparer', + 'optimizer', + 'data_columns' + ] + for key in desired_keys: + assert key in params \ No newline at end of file diff --git a/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py b/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py index 57c80f2..9d01df0 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py @@ -20,6 +20,30 @@ def test_dummy_encoder(): assert check.equals(df) +@pytest.mark.parametrize( + 'deep', + [True, False] +) +def test_dummy_encoder_get_params_keys(deep): + """Test that the desired keys show up for the DummyEncoder object. + + Args: + deep: deep param to get_params + + """ + from foreshadow.concrete import DummyEncoder + de = DummyEncoder() + params = de.get_params(deep=deep) + + desired_keys = [ + 'delimeter', + 'other_cutoff', + 'other_name', + ] + for key in desired_keys: + assert key in params + + def test_dummy_encoder_other(): import pandas as pd @@ -60,6 +84,28 @@ def test_box_cox(): ) +@pytest.mark.parametrize( + 'deep', + [True, False] +) +def test_label_encoder_get_params_keys(deep): + """Test that the desired keys show up for the LabelEncoder object. + + Args: + deep: deep param to get_params + + """ + from foreshadow.concrete import FixedLabelEncoder + fle = FixedLabelEncoder() + params = fle.get_params(deep=deep) + + desired_keys = [ + 'encoder' + ] + for key in desired_keys: + assert key in params + + def test_transformer_fancy_impute_set_params(): import numpy as np import pandas as pd @@ -139,8 +185,8 @@ def test_transformer_onehotencoder_fit_transform_keep_cols(): df = pd.DataFrame( {"neat": ["apple", "apple", "orange", "apple", "orange"]} ) - ohe = OneHotEncoder(use_cat_names=True, handle_unknown="ignore") - ohe.set_extra_params(name="encoder", keep_columns=True) + ohe = OneHotEncoder(use_cat_names=True, handle_unknown="ignore", + name="encoder", keep_columns=True) assert ohe.fit(df) == ohe assert list(ohe.transform(df)) == ["neat", "neat_apple", "neat_orange"] diff --git a/foreshadow/tests/test_transformers/test_transformers.py b/foreshadow/tests/test_transformers/test_transformers.py index 8b01a70..451d629 100644 --- a/foreshadow/tests/test_transformers/test_transformers.py +++ b/foreshadow/tests/test_transformers/test_transformers.py @@ -12,8 +12,7 @@ def test_transformer_keep_cols(): df = pd.read_csv(boston_path) - custom = CustomScaler() - custom.set_extra_params(keep_columns=True) + custom = CustomScaler(keep_columns=True) custom_tf = custom.fit_transform(df[["crim"]]) assert custom_tf.shape[1] == 2 @@ -27,8 +26,7 @@ def test_transformer_naming_override(): df = pd.read_csv(boston_path) - scaler = StandardScaler() - scaler.set_extra_params(name="test", keep_columns=False) + scaler = StandardScaler(name="test", keep_columns=False) out = scaler.fit_transform(df[["crim"]]) assert out.iloc[:, 0].name == "crim" @@ -342,8 +340,7 @@ def test_smarttransformer_function_override(smart_child): # assert smart.transformer.name == "impute" # not relevant anymore. - std = Imputer() - std.set_extra_params(name="impute") + std = Imputer(name="impute") std_data = std.fit_transform(df[["crim"]]) assert smart_data.equals(std_data) @@ -386,7 +383,7 @@ def test_smarttransformer_set_params_override(smart_child): from foreshadow.concrete import StandardScaler smart = smart_child(transformer="Imputer") - smart.set_params(**{"transformer": {"class_name": "StandardScaler"}}) + smart.set_params(**{"transformer": "StandardScaler"}) assert isinstance(smart.transformer, StandardScaler) @@ -413,10 +410,11 @@ def test_smarttransformer_set_params_default(smart_child): """ smart = smart_child() smart.fit([1, 2, 3]) + before = smart.__dict__ + params = smart.get_params() + smart = smart_child().set_params(**params) - smart.set_params(**{"transformer": {"with_mean": False}}) - - assert not smart.transformer.with_mean + assert smart.__dict__ == before def test_smarttransformer_get_params(smart_child): @@ -434,14 +432,7 @@ def test_smarttransformer_get_params(smart_child): params = smart.get_params() print(params) assert params == { - "transformer": { - "class_name": "Imputer", - "missing_values": "NaN", - "strategy": "mean", - "copy": True, - "axis": 0, - "verbose": 0, - }, + "transformer": smart.transformer, "name": None, "keep_columns": False, "y_var": False, @@ -449,6 +440,11 @@ def test_smarttransformer_get_params(smart_child): "should_resolve": False, "column_sharer": None, "check_wrapped": True, + "transformer__copy": True, + "transformer__missing_values": 'NaN', + "transformer__strategy": 'mean', + "transformer__verbose": 0, + 'transformer__axis': 0, } diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index 10a4bdc..38f503b 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd import scipy +from types import MethodType from sklearn.base import BaseEstimator from sklearn.utils.fixes import signature @@ -31,6 +32,7 @@ def pandas_wrap(transformer): # noqa: C901 # MRO metaclass issues in DFTransformer if we try to choose the base class # for our metaclass that is not the same one for the transformer we are # also extending. + class DFTransformerMeta(type(transformer)): """Metaclass for DFTransformer to appear as parent Transformer.""" @@ -72,7 +74,7 @@ class DFTransformer( ): """Wrapper to Enable parent transformer to handle DataFrames.""" - def __init__(self, *args, **kwargs): + def __init__(self, *args, name=None, keep_columns=False, **kwargs): """Initialize parent Transformer. Args: @@ -86,20 +88,20 @@ def __init__(self, *args, **kwargs): ..# noqa: I402 """ - if "name" in kwargs: - self.name = kwargs.pop("name") - logging.warning( - "name is a deprecated kwarg. Please remove " - "it from the kwargs and instead set it " - "after instantiation." - ) - if "keep_columns" in kwargs: - self.keep_column = kwargs.pop("keep_columns") - logging.warning( - "keep_columns is a deprecated kwarg. Please " - "remove it from the kwargs and instead set " - "it after instantiation." - ) + self.name = name + self.keep_columns = keep_columns + # self.name = kwargs.pop("name", None) + # logging.warning( + # "name is a deprecated kwarg. Please remove " + # "it from the kwargs and instead set it " + # "after instantiation." + # ) + # self.keep_column = kwargs.pop("keep_columns", False) + # logging.warning( + # "keep_columns is a deprecated kwarg. Please " + # "remove it from the kwargs and instead set " + # "it after instantiation." + # ) try: super(DFTransformer, self).__init__(*args, **kwargs) except TypeError as e: @@ -118,7 +120,8 @@ def get_params(self, deep=True): a parent transformer (by OOP), we use the parent's init statement and then this DFTransformer's additional arguments. We must override of BaseEstimator will complain about our - nonstandard usage. + nonstandard usage. _get_param_names override holds the change to + the parent __init__. Args: deep (bool): If True, will return the parameters for this @@ -129,57 +132,29 @@ def get_params(self, deep=True): DFTransformer wrapper. """ - parent_params = BaseEstimator.get_params(transformer, deep=deep) - # will contain any init arguments that are not variable keyword - # arguments. By default, this means that any new transformer - # cannot have variable keyword arguments in its init less the - # transformer designer is okay with it not getting picked up here. - # The transformer class passed will not contain the current values, - # so we set them with the values on the object instance, below. - try: - self_params = super().get_params(deep=deep) - except RuntimeError: - # TODO, Chris explain why we copy scikit-learn's internal - # get_params. - self_params = dict() # the output - init = getattr( - self.__init__, "deprecated_original", self.__init__ - ) - if init is object.__init__: - return self_params - # explicit constructor to introspect - # introspect the constructor arguments to find the model - # parameters to represent - init_signature = signature(init) - # Consider the constructor parameters excluding 'self' - self_sig = [ - p - for p in init_signature.parameters.values() - if p.name != "self" - and p.kind != p.VAR_KEYWORD - and p.kind != p.VAR_POSITIONAL - ] - self_sig = sorted([p.name for p in self_sig]) - for key in self_sig + list(parent_params.keys()): - warnings.simplefilter("always", DeprecationWarning) - try: - with warnings.catch_warnings(record=True) as w: - value = getattr(self, key, None) - if len(w) and w[0].category == DeprecationWarning: - # if the parameter is deprecated, don't show it - continue - finally: - warnings.filters.pop(0) - - # XXX: should we rather test if instance of estimator? - if deep and hasattr(value, "get_params"): - deep_items = value.get_params().items() - self_params.update( - (key + "__" + k, val) for k, val in deep_items - ) - self_params[key] = value - - return self_params + params = super().get_params(deep=deep) + # params['keep_column'] = self.keep_column + # params['name'] = self.name + return params + + def set_params(self, **params): + """Override standard set_params to handle nonstandard init. + + BaseEstimator for sklearn gets and sets parameters based on the + init statement for that class. Since this class is used to wrap + a parent transformer (by OOP), we use the parent's init + statement and then this DFTransformer's additional arguments. + We must override of BaseEstimator will complain about our + nonstandard usage. _get_param_names override holds the change to + the parent __init__. + + Args: + **params: params to init. + + """ + # self.keep_column = params.pop("keep_column", False) + # self.name = params.pop("name", None) + return super().set_params(**params) def fit(self, X, *args, **kwargs): """Fit the estimator or transformer, pandas enabled. @@ -383,11 +358,17 @@ def fit_transform(self, X, *args, **kwargs): return out def __repr__(self): - return "DFTransformer: {}".format(self.__class__.__name__) + return "DF{}".format(self.__class__.__name__) + + @classmethod + def _get_param_names(cls): + """Shadow the parent __init__ method. - def set_extra_params(self, name=None, keep_columns=False): - setattr(self, "name", name) - setattr(self, "keep_columns", keep_columns) + Returns: + _param_names for the parent class (and therefore the __init__). + + """ + return transformer._get_param_names() return DFTransformer From 5e032ce142c91b784286f350ac83299039b4bccb Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 12 Aug 2019 15:23:22 -0400 Subject: [PATCH 04/37] isort, black --- foreshadow/concrete/internals/labelencoder.py | 6 ++-- foreshadow/estimators/auto.py | 12 +++++--- foreshadow/foreshadow.py | 4 +-- foreshadow/smart/smart.py | 4 +-- foreshadow/steps/__init__.py | 4 +-- foreshadow/tests/test_estimators/test_meta.py | 13 +++----- foreshadow/tests/test_foreshadow.py | 18 +++++------ .../test_internals/test_internal.py | 30 ++++++++----------- .../test_transformers/test_transformers.py | 6 ++-- foreshadow/wrapper.py | 12 ++++---- 10 files changed, 50 insertions(+), 59 deletions(-) diff --git a/foreshadow/concrete/internals/labelencoder.py b/foreshadow/concrete/internals/labelencoder.py index a79e099..77cb83b 100644 --- a/foreshadow/concrete/internals/labelencoder.py +++ b/foreshadow/concrete/internals/labelencoder.py @@ -75,9 +75,9 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) if not deep: - params['encoder'] = self.encoder + params["encoder"] = self.encoder else: - params['encoder'] = self.encoder.get_params(deep=deep) + params["encoder"] = self.encoder.get_params(deep=deep) return params def set_params(self, **params): @@ -87,5 +87,5 @@ def set_params(self, **params): **params: params to set on this estimator. """ - self.encoder = params.pop('encoder') + self.encoder = params.pop("encoder") super().set_params(**params) diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 04b5b6f..9c28da6 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -326,8 +326,12 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - params.update({'estimator': self.estimator, - 'estimator_class': self.estimator_class}) + params.update( + { + "estimator": self.estimator, + "estimator_class": self.estimator_class, + } + ) return params def set_params(self, **params): @@ -337,8 +341,8 @@ def set_params(self, **params): **params: params to set. """ - self.estimator = params.pop('estimator', None) - self.estimator_class = params.pop('estimator_class', None) + self.estimator = params.pop("estimator", None) + self.estimator_class = params.pop("estimator_class", None) return super().set_params(**params) diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 65176d5..6006303 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -293,7 +293,7 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - params['data_columns'] = self.data_columns + params["data_columns"] = self.data_columns return params def set_params(self, **params): @@ -303,5 +303,5 @@ def set_params(self, **params): **params: params to set. """ - self.data_columns = params.pop('data_columns', None) + self.data_columns = params.pop("data_columns", None) return super().set_params(**params) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index 4d895bc..8adab78 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -155,10 +155,10 @@ def set_params(self, **params): **params (dict): any valid parameter of this estimator """ - if 'transformer' in params: # required as set_params assumes + if "transformer" in params: # required as set_params assumes # self.transformer will already be the object housed here. We # have it set to None as it may be anything at runtime. - self.transformer = params['transformer'] + self.transformer = params["transformer"] return super().set_params(**params) @abstractmethod diff --git a/foreshadow/steps/__init__.py b/foreshadow/steps/__init__.py index 3bdf42f..0430a8d 100644 --- a/foreshadow/steps/__init__.py +++ b/foreshadow/steps/__init__.py @@ -4,8 +4,8 @@ from .feature_engineerer import FeatureEngineererMapper from .feature_reducer import FeatureReducerMapper from .mapper import IntentMapper -from .preprocessor import Preprocessor from .preparerstep import PreparerStep +from .preprocessor import Preprocessor __all__ = [ @@ -14,5 +14,5 @@ "Preprocessor", "FeatureEngineererMapper", "FeatureReducerMapper", - "PreparerStep" + "PreparerStep", ] diff --git a/foreshadow/tests/test_estimators/test_meta.py b/foreshadow/tests/test_estimators/test_meta.py index d6ba9b0..ff1ded1 100644 --- a/foreshadow/tests/test_estimators/test_meta.py +++ b/foreshadow/tests/test_estimators/test_meta.py @@ -78,10 +78,7 @@ def test_metaestimator_score(): ) -@pytest.mark.parametrize( - 'deep', - [True, False] -) +@pytest.mark.parametrize("deep", [True, False]) def test_meta_estimator_get_params_keys(deep): """Test that the desired keys show up for the MetaEstimator object. @@ -92,12 +89,10 @@ def test_meta_estimator_get_params_keys(deep): from foreshadow.estimators.meta import MetaEstimator from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression + me = MetaEstimator(LinearRegression(), StandardScaler()) params = me.get_params(deep=deep) - desired_keys = [ - 'estimator', - 'preprocessor' - ] + desired_keys = ["estimator", "preprocessor"] for key in desired_keys: - assert key in params \ No newline at end of file + assert key in params diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index 08a3e36..b2403ec 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -635,10 +635,7 @@ def test_core_foreshadow_example_classification(): print("Iris score: %f" % score) -@pytest.mark.parametrize( - 'deep', - [True, False] -) +@pytest.mark.parametrize("deep", [True, False]) def test_foreshadow_get_params_keys(deep): """Test that the desired keys show up for the Foreshadow object. @@ -647,15 +644,16 @@ def test_foreshadow_get_params_keys(deep): """ from foreshadow.foreshadow import Foreshadow + fs = Foreshadow() params = fs.get_params(deep=deep) desired_keys = [ - 'X_preparer', - 'estimator', - 'y_preparer', - 'optimizer', - 'data_columns' + "X_preparer", + "estimator", + "y_preparer", + "optimizer", + "data_columns", ] for key in desired_keys: - assert key in params \ No newline at end of file + assert key in params diff --git a/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py b/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py index 9d01df0..f0368f1 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_internals/test_internal.py @@ -20,10 +20,7 @@ def test_dummy_encoder(): assert check.equals(df) -@pytest.mark.parametrize( - 'deep', - [True, False] -) +@pytest.mark.parametrize("deep", [True, False]) def test_dummy_encoder_get_params_keys(deep): """Test that the desired keys show up for the DummyEncoder object. @@ -32,14 +29,11 @@ def test_dummy_encoder_get_params_keys(deep): """ from foreshadow.concrete import DummyEncoder + de = DummyEncoder() params = de.get_params(deep=deep) - desired_keys = [ - 'delimeter', - 'other_cutoff', - 'other_name', - ] + desired_keys = ["delimeter", "other_cutoff", "other_name"] for key in desired_keys: assert key in params @@ -84,10 +78,7 @@ def test_box_cox(): ) -@pytest.mark.parametrize( - 'deep', - [True, False] -) +@pytest.mark.parametrize("deep", [True, False]) def test_label_encoder_get_params_keys(deep): """Test that the desired keys show up for the LabelEncoder object. @@ -96,12 +87,11 @@ def test_label_encoder_get_params_keys(deep): """ from foreshadow.concrete import FixedLabelEncoder + fle = FixedLabelEncoder() params = fle.get_params(deep=deep) - desired_keys = [ - 'encoder' - ] + desired_keys = ["encoder"] for key in desired_keys: assert key in params @@ -185,8 +175,12 @@ def test_transformer_onehotencoder_fit_transform_keep_cols(): df = pd.DataFrame( {"neat": ["apple", "apple", "orange", "apple", "orange"]} ) - ohe = OneHotEncoder(use_cat_names=True, handle_unknown="ignore", - name="encoder", keep_columns=True) + ohe = OneHotEncoder( + use_cat_names=True, + handle_unknown="ignore", + name="encoder", + keep_columns=True, + ) assert ohe.fit(df) == ohe assert list(ohe.transform(df)) == ["neat", "neat_apple", "neat_orange"] diff --git a/foreshadow/tests/test_transformers/test_transformers.py b/foreshadow/tests/test_transformers/test_transformers.py index 451d629..e770e95 100644 --- a/foreshadow/tests/test_transformers/test_transformers.py +++ b/foreshadow/tests/test_transformers/test_transformers.py @@ -441,10 +441,10 @@ def test_smarttransformer_get_params(smart_child): "column_sharer": None, "check_wrapped": True, "transformer__copy": True, - "transformer__missing_values": 'NaN', - "transformer__strategy": 'mean', + "transformer__missing_values": "NaN", + "transformer__strategy": "mean", "transformer__verbose": 0, - 'transformer__axis': 0, + "transformer__axis": 0, } diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index 38f503b..94990c2 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -1,11 +1,11 @@ """Transformer wrapping utility classes and functions.""" import warnings +from types import MethodType import numpy as np import pandas as pd import scipy -from types import MethodType from sklearn.base import BaseEstimator from sklearn.utils.fixes import signature @@ -97,11 +97,11 @@ def __init__(self, *args, name=None, keep_columns=False, **kwargs): # "after instantiation." # ) # self.keep_column = kwargs.pop("keep_columns", False) - # logging.warning( - # "keep_columns is a deprecated kwarg. Please " - # "remove it from the kwargs and instead set " - # "it after instantiation." - # ) + # logging.warning( + # "keep_columns is a deprecated kwarg. Please " + # "remove it from the kwargs and instead set " + # "it after instantiation." + # ) try: super(DFTransformer, self).__init__(*args, **kwargs) except TypeError as e: From 9e1e6786f55b47844413d25dd218e8e81dd090ba Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 12 Aug 2019 15:26:44 -0400 Subject: [PATCH 05/37] Flaked. --- foreshadow/concrete/internals/fancyimpute.py | 3 +++ foreshadow/estimators/auto.py | 3 +++ foreshadow/foreshadow.py | 3 +++ foreshadow/smart/smart.py | 4 +++- foreshadow/steps/preparerstep.py | 3 +-- foreshadow/wrapper.py | 5 ----- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index 13b5f5c..c172ba9 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -53,6 +53,9 @@ def set_params(self, **params): Args: **params: params to set + Returns: + see super. + Raises: ValueError: If method is invalid diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 9c28da6..8b17988 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -340,6 +340,9 @@ def set_params(self, **params): Args: **params: params to set. + Returns: + See super. + """ self.estimator = params.pop("estimator", None) self.estimator_class = params.pop("estimator_class", None) diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 6006303..9140f1e 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -302,6 +302,9 @@ def set_params(self, **params): Args: **params: params to set. + Returns: + See super. + """ self.data_columns = params.pop("data_columns", None) return super().set_params(**params) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index 8adab78..cb36e80 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -1,7 +1,6 @@ """Smart Transformer and its helper methods.""" from abc import ABCMeta, abstractmethod -from copy import deepcopy from sklearn.base import BaseEstimator, TransformerMixin @@ -154,6 +153,9 @@ def set_params(self, **params): Args: **params (dict): any valid parameter of this estimator + Returns: + see super. + """ if "transformer" in params: # required as set_params assumes # self.transformer will already be the object housed here. We diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index ce7e0ca..33055a8 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -1,6 +1,5 @@ """General base classes used across Foreshadow.""" from collections import MutableMapping, defaultdict, namedtuple -from inspect import signature from sklearn.base import BaseEstimator, TransformerMixin @@ -525,7 +524,7 @@ def inverse_transform(self, X, *args, **kwargs): @classmethod def _get_param_names(cls): - """Iteratively get __init__ params for all classes until PreparerStep. + """Get iteratively __init__ params for all classes until PreparerStep. Returns: params for all parents up to and including PreparerStep. diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index 94990c2..ea9256d 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -1,13 +1,8 @@ """Transformer wrapping utility classes and functions.""" -import warnings -from types import MethodType - import numpy as np import pandas as pd import scipy -from sklearn.base import BaseEstimator -from sklearn.utils.fixes import signature from foreshadow.logging import logging from foreshadow.serializers import ConcreteSerializerMixin From 261788a94f0fa01a853b892622afd31512084378 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 10:58:37 -0400 Subject: [PATCH 06/37] CI Flake. --- foreshadow/wrapper.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index ea9256d..8f09ee3 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -74,6 +74,8 @@ def __init__(self, *args, name=None, keep_columns=False, **kwargs): Args: *args: args to the parent constructor (shadowed transformer) + name: name of the transformer. + keep_columns: keep original column names in the graph. **kwargs: kwargs to the parent constructor Raises: @@ -146,6 +148,9 @@ def set_params(self, **params): Args: **params: params to init. + Returns: + See super. + """ # self.keep_column = params.pop("keep_column", False) # self.name = params.pop("name", None) From 6c793d1c8d53d252cef12de76df4e093ab6823e4 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 11:28:38 -0400 Subject: [PATCH 07/37] smart patch to imic preparerstep --- foreshadow/smart/smart.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index cb36e80..ddff10c 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -259,3 +259,18 @@ def inverse_transform(self, X): X = check_df(X) self.resolve(X) return self.transformer.inverse_transform(X) + + @classmethod + def _get_param_names(cls): + """Get iteratively __init__ params for all classes until PreparerStep. + + Returns: + params for all parents up to and including PreparerStep. + Includes the calling classes params. + + """ + params = super()._get_param_names() + while cls.__name__ != SmartTransformer.__name__: + cls = cls.__mro__[1] + params += cls._get_param_names() + return params From a2f3027cf37a92007cbbacfdeee33fa15a0cef77 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 11:49:08 -0400 Subject: [PATCH 08/37] Fixing tests. --- .../test_feature_engineerers/test_feature_engineer.py | 4 ++-- .../test_feature_reducer/test_feature_reducer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/foreshadow/tests/test_transformers/test_concrete/test_feature_engineerers/test_feature_engineer.py b/foreshadow/tests/test_transformers/test_concrete/test_feature_engineerers/test_feature_engineer.py index 8c6ebdb..26ae9d2 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_feature_engineerers/test_feature_engineer.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_feature_engineerers/test_feature_engineer.py @@ -62,8 +62,8 @@ def test_feature_engineerer_get_mapping(): column_mapping = fem.get_mapping(data) check_pm = PreparerMapping() - check_pm.add(["age", "weights"], [FeatureEngineerer()]) - check_pm.add(["financials"], [FeatureEngineerer()]) + check_pm.add(["age", "weights"], [FeatureEngineerer(column_sharer=cs)]) + check_pm.add(["financials"], [FeatureEngineerer(column_sharer=cs)]) for key in column_mapping.store: assert key in check_pm.store diff --git a/foreshadow/tests/test_transformers/test_concrete/test_feature_reducer/test_feature_reducer.py b/foreshadow/tests/test_transformers/test_concrete/test_feature_reducer/test_feature_reducer.py index e004a64..c5dcd52 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_feature_reducer/test_feature_reducer.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_feature_reducer/test_feature_reducer.py @@ -57,8 +57,8 @@ def test_feature_reducer_get_mapping_by_intent(): column_mapping = fr.get_mapping(data) check = PreparerMapping() - check.add(["age", "weights"], [FeatureReducer()]) - check.add(["occupation"], [FeatureReducer()]) + check.add(["age", "weights"], [FeatureReducer(column_sharer=cs)]) + check.add(["occupation"], [FeatureReducer(column_sharer=cs)]) for key in column_mapping.store: assert key in check.store From af4170d1254142844d6aac18aaf2619c4706c3ce Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 13:24:31 -0400 Subject: [PATCH 09/37] Working MVP given new architecture changes. --- foreshadow/concrete/internals/fancyimpute.py | 29 ++++++++------------ foreshadow/smart/smart.py | 23 ++-------------- foreshadow/wrapper.py | 14 +++------- searcher_script.py | 19 +++++++------ setup.cfg | 4 +-- 5 files changed, 29 insertions(+), 60 deletions(-) diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index c172ba9..a7f230e 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -21,16 +21,22 @@ class FancyImputer(BaseEstimator, TransformerMixin): def __init__(self, method="SimpleFill", impute_kwargs={}): self.impute_kwargs = impute_kwargs self.method = method + self._load_imputer() + + def _load_imputer(self): + """Load concrete fancy imputer based on string representation. + + Auto import and initialize fancyimpute class defined by method. + """ try: - module = __import__("fancyimpute", [method], 1) - self.cls = getattr(module, method) + module = __import__("fancyimpute", [self.method], 1) + self.cls = getattr(module, self.method) except Exception: raise ValueError( "Invalid method. Possible values are BiScaler, KNN, " "NuclearNormMinimization and SoftImpute" ) - - self.imputer = self.cls(**impute_kwargs) + self.imputer = self.cls(**self.impute_kwargs) def get_params(self, deep=True): """Get parameters for this estimator. @@ -61,20 +67,7 @@ def set_params(self, **params): """ out = super().set_params(**params) - - # Auto import and initialize fancyimpute class defined by method - try: - from importlib import import_module - - module = import_module("fancyimpute") - self.cls = getattr(module, self.method) - except Exception: - raise ValueError( - "Invalid method. Possible values are BiScaler, KNN, " - "NuclearNormMinimization and SoftImpute" - ) - - self.imputer = self.cls(self.impute_kwargs) + self._load_imputer() return out def fit(self, X, y=None): diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index f5de015..0a1fcd5 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -156,29 +156,10 @@ def set_params(self, **params): **params (dict): any valid parameter of this estimator """ - params = deepcopy(params) - transformer_params = params.pop("transformer", self.transformer) + if "transformer" in params: + self.transformer = params["transformer"] super().set_params(**params) - # Calls to override auto set the transformer instance - if ( - isinstance(transformer_params, dict) - and "class_name" in transformer_params - ): # instantiate a - # new - # self.transformer - self.transformer = transformer_params - elif self.transformer is not None: - # valid_params = { - # k.partition("__")[2]: v - # for k, v in params.items() - # if k.split("__")[0] == "transformer" - # } - self.transformer.set_params(**transformer_params) - self.transformer.set_extra_params( - name=type(self.transformer).__name__, - keep_columns=self.keep_columns, - ) @abstractmethod def pick_transformer(self, X, y=None, **fit_params): diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index 8f09ee3..b7b5893 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -116,9 +116,8 @@ def get_params(self, deep=True): init statement for that class. Since this class is used to wrap a parent transformer (by OOP), we use the parent's init statement and then this DFTransformer's additional arguments. - We must override of BaseEstimator will complain about our - nonstandard usage. _get_param_names override holds the change to - the parent __init__. + We must override _get_param_names so that this method captures + the parent's __init__. Args: deep (bool): If True, will return the parameters for this @@ -130,8 +129,6 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - # params['keep_column'] = self.keep_column - # params['name'] = self.name return params def set_params(self, **params): @@ -141,9 +138,8 @@ def set_params(self, **params): init statement for that class. Since this class is used to wrap a parent transformer (by OOP), we use the parent's init statement and then this DFTransformer's additional arguments. - We must override of BaseEstimator will complain about our - nonstandard usage. _get_param_names override holds the change to - the parent __init__. + We must override _get_param_names so that this method captures + the parent's __init__. Args: **params: params to init. @@ -152,8 +148,6 @@ def set_params(self, **params): See super. """ - # self.keep_column = params.pop("keep_column", False) - # self.name = params.pop("name", None) return super().set_params(**params) def fit(self, X, *args, **kwargs): diff --git a/searcher_script.py b/searcher_script.py index 6df0382..699eb87 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -37,23 +37,22 @@ pipe.fit(X_data, y_data) -param_distributions = { - "s__transformer": hp.choice( +param_distributions = hp.choice( "s__transformer", [ { - "class_name": "StandardScaler", - "with_mean": hp.choice("with_mean", [False, True]), + "s__transformer": "StandardScaler", + "s__transformer__with_mean": hp.choice("with_mean", [False, + True]), }, { - "class_name": "MinMaxScaler", - "feature_range": hp.choice( + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": hp.choice( "feature_range", [(0, 1), (0, 0.5)] ), }, ], ) -} class HyperOptSampler(object): @@ -117,8 +116,8 @@ def _get_param_iterator(self): self.n_iter, random_state=self.random_state, ) - for i in out: - print(i) + # for i in out: + # print(i) return out @@ -163,5 +162,7 @@ def _get_param_iterator(self): [c for c in results.columns if all(s not in c for s in ["time", "params"])] ] +print(results) + # import pdb; pdb.set_trace() diff --git a/setup.cfg b/setup.cfg index 39fcbb7..dc8ffcd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,8 +33,8 @@ per-file-ignores = # pytest [tool:pytest] -addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html -;addopts = -s -v --xdoc +;addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html +addopts = -s -vv --xdoc # above is good for pycharm environments. filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning From 7689e45b871402dc1fc8b523695b8ccefdd1c7d2 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 13:29:22 -0400 Subject: [PATCH 10/37] CR changes --- foreshadow/concrete/internals/fancyimpute.py | 33 ++++++++------------ foreshadow/smart/smart.py | 9 ++++-- foreshadow/wrapper.py | 14 +++------ 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index c172ba9..f103212 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -21,16 +21,26 @@ class FancyImputer(BaseEstimator, TransformerMixin): def __init__(self, method="SimpleFill", impute_kwargs={}): self.impute_kwargs = impute_kwargs self.method = method + + def _load_imputer(self): + """Load concrete fancy imputer based on string representation. + + Auto import and initialize fancyimpute class defined by method. + + Raises: + ValueError: If method is invalid + + """ try: - module = __import__("fancyimpute", [method], 1) - self.cls = getattr(module, method) + module = __import__("fancyimpute", [self.method], 1) + self.cls = getattr(module, self.method) except Exception: raise ValueError( "Invalid method. Possible values are BiScaler, KNN, " "NuclearNormMinimization and SoftImpute" ) - self.imputer = self.cls(**impute_kwargs) + self.imputer = self.cls(**self.impute_kwargs) def get_params(self, deep=True): """Get parameters for this estimator. @@ -56,25 +66,8 @@ def set_params(self, **params): Returns: see super. - Raises: - ValueError: If method is invalid - """ out = super().set_params(**params) - - # Auto import and initialize fancyimpute class defined by method - try: - from importlib import import_module - - module = import_module("fancyimpute") - self.cls = getattr(module, self.method) - except Exception: - raise ValueError( - "Invalid method. Possible values are BiScaler, KNN, " - "NuclearNormMinimization and SoftImpute" - ) - - self.imputer = self.cls(self.impute_kwargs) return out def fit(self, X, y=None): diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index ddff10c..5885c43 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -157,9 +157,12 @@ def set_params(self, **params): see super. """ - if "transformer" in params: # required as set_params assumes - # self.transformer will already be the object housed here. We - # have it set to None as it may be anything at runtime. + if "transformer" in params: # We load this first as + # BaseEstimator.set_params will call this set_params of this + # object. In the init, we set this initially to None as it will + # later be resolved to whichever concrete transformer is chosen. + # None has no set_params, so we need to set this here, before we + # call to super(). self.transformer = params["transformer"] return super().set_params(**params) diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index 8f09ee3..b7b5893 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -116,9 +116,8 @@ def get_params(self, deep=True): init statement for that class. Since this class is used to wrap a parent transformer (by OOP), we use the parent's init statement and then this DFTransformer's additional arguments. - We must override of BaseEstimator will complain about our - nonstandard usage. _get_param_names override holds the change to - the parent __init__. + We must override _get_param_names so that this method captures + the parent's __init__. Args: deep (bool): If True, will return the parameters for this @@ -130,8 +129,6 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - # params['keep_column'] = self.keep_column - # params['name'] = self.name return params def set_params(self, **params): @@ -141,9 +138,8 @@ def set_params(self, **params): init statement for that class. Since this class is used to wrap a parent transformer (by OOP), we use the parent's init statement and then this DFTransformer's additional arguments. - We must override of BaseEstimator will complain about our - nonstandard usage. _get_param_names override holds the change to - the parent __init__. + We must override _get_param_names so that this method captures + the parent's __init__. Args: **params: params to init. @@ -152,8 +148,6 @@ def set_params(self, **params): See super. """ - # self.keep_column = params.pop("keep_column", False) - # self.name = params.pop("name", None) return super().set_params(**params) def fit(self, X, *args, **kwargs): From 6c6a30aa7fb1c39bcd8960d130fca93706d8d38d Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 13:31:00 -0400 Subject: [PATCH 11/37] Last CR change. --- foreshadow/steps/preparerstep.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 33055a8..3aba434 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -526,6 +526,10 @@ def inverse_transform(self, X, *args, **kwargs): def _get_param_names(cls): """Get iteratively __init__ params for all classes until PreparerStep. + This method is implemented as a convenience for any child. It will + automatically climb the MRO for a child until it reaches this class + (the last parent who's __init__ params we care about). + Returns: params for all parents up to and including PreparerStep. Includes the calling classes params. From 22da00ffaf94a71e25e8dd8298f361031c29fde7 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 15:01:30 -0400 Subject: [PATCH 12/37] saving work --- foreshadow/optimizers/__init__.py | 6 +++--- foreshadow/optimizers/param_distribution.py | 19 +++++++++++++++++++ foreshadow/serializers.py | 17 +++++++++++++---- foreshadow/smart/smart.py | 4 ++-- .../test_transformers/test_transformers.py | 1 + 5 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 foreshadow/optimizers/param_distribution.py diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 12a7702..22382df 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -1,6 +1,6 @@ """Foreshadow optimizers.""" -from foreshadow.optimizers.param_mapping import param_mapping +# from foreshadow.optimizers.param_mapping import param_mapping +from foreshadow.optimizers.param_distribution import ParamSpec - -__all__ = ["param_mapping"] +__all__ = ["ParamSpec"] diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py new file mode 100644 index 0000000..5cd9988 --- /dev/null +++ b/foreshadow/optimizers/param_distribution.py @@ -0,0 +1,19 @@ +"""Classes to be configured by user for customizing parameter tuning.""" +import foreshadow as fs +import foreshadow.serializers as ser + + +class ParamSpec(fs.Foreshadow, ser.ConcreteSerializerMixin): + def __init__(self, parameter_distribution): + self.parameter_distribution = parameter_distribution + + @classmethod + def _get_param_names(cls): + return super()._get_param_names() + fs.Foreshadow._get_param_names() + + def set_params(self, **params): + + + +if __name__ == '__main__': + ParamSpec().to_json("test") diff --git a/foreshadow/serializers.py b/foreshadow/serializers.py index dd6d88c..b440d45 100644 --- a/foreshadow/serializers.py +++ b/foreshadow/serializers.py @@ -269,7 +269,8 @@ def dict_serialize(self, deep=True): """ return _make_serializable( - self.get_params(deep), serialize_args=self.serialize_params + self.get_params(deep), + # serialize_args=self.serialize_params ) @classmethod @@ -347,7 +348,7 @@ def disk_deserialize(cls, data): with open(fpath, "rb") as fopen: return pickle.load(fopen) - def serialize(self, method=None, name=None, **kwargs): + def serialize(self, method=None, **kwargs): """Serialize data as specified. If you would like to save the transformer parameters without saving @@ -363,8 +364,6 @@ def serialize(self, method=None, name=None, **kwargs): Args: method (str): A choice between `json` and `pickle` to serialize a string. - name (str): The name associated with the transformer. If not - specified, a name will be derived if possible. **kwargs: The keyword arguments to pass to the serialization method Returns: @@ -448,6 +447,16 @@ def dict_serialize(self, deep=False): return super().dict_serialize(deep=deep) +class ParamSpecSerializerMixin(ConcreteSerializerMixin): + def serialize(self, **kwargs): + full_ser = super().serialize(**kwargs) + return full_ser + + @classmethod + def deserialize(cls, data): + return super().deserialize(data) + + def deserialize(data): """Allow the deserialization of any transformer. diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index 0a1fcd5..5c75a1a 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -156,8 +156,8 @@ def set_params(self, **params): **params (dict): any valid parameter of this estimator """ - if "transformer" in params: - self.transformer = params["transformer"] + # if "transformer" in params: + # self.transformer = params["transformer"] super().set_params(**params) diff --git a/foreshadow/tests/test_transformers/test_transformers.py b/foreshadow/tests/test_transformers/test_transformers.py index e770e95..7a1cc78 100644 --- a/foreshadow/tests/test_transformers/test_transformers.py +++ b/foreshadow/tests/test_transformers/test_transformers.py @@ -412,6 +412,7 @@ def test_smarttransformer_set_params_default(smart_child): smart.fit([1, 2, 3]) before = smart.__dict__ params = smart.get_params() + print(params) smart = smart_child().set_params(**params) assert smart.__dict__ == before From afef145aada89699f2b77df06904d2e2d9858ebf Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 15:55:21 -0400 Subject: [PATCH 13/37] Creating new foreshadow.base.BaseEstimator to enable proper set_params for our use case. Using patchy to implement this. Switching all imports to use our internal BaseEstimator and TransformerMixin. Adding Smart test. --- doc/developers.rst | 2 +- foreshadow/base.py | 21 +++ foreshadow/concrete/internals/boxcox.py | 2 +- .../concrete/internals/cleaners/base.py | 2 +- foreshadow/concrete/internals/dropfeature.py | 2 +- foreshadow/concrete/internals/dummyencoder.py | 2 +- foreshadow/concrete/internals/fancyimpute.py | 4 +- foreshadow/concrete/internals/financial.py | 2 +- foreshadow/concrete/internals/htmlremover.py | 2 +- foreshadow/concrete/internals/labelencoder.py | 2 +- foreshadow/concrete/internals/notransform.py | 2 +- foreshadow/concrete/internals/tfidf.py | 2 +- foreshadow/concrete/internals/tostring.py | 2 +- .../concrete/internals/uncommonremover.py | 2 +- foreshadow/estimators/auto.py | 2 +- foreshadow/estimators/meta.py | 2 +- foreshadow/foreshadow.py | 2 +- foreshadow/intents/base.py | 2 +- foreshadow/old/intents/registry.py | 2 +- foreshadow/old/preprocessor.py | 2 +- foreshadow/parallelprocessor.py | 2 +- foreshadow/smart/smart.py | 19 ++- foreshadow/steps/preparerstep.py | 2 +- .../tests/test_core/test_newpreprocessor.py | 6 +- foreshadow/tests/test_core/test_wrapper.py | 2 +- foreshadow/tests/test_foreshadow.py | 6 +- .../test_smart/test_smart.py | 80 ++++++++++ foreshadow/utils/validation.py | 2 +- poetry.lock | 140 ++---------------- pyproject.toml | 1 + setup.cfg | 4 +- 31 files changed, 161 insertions(+), 164 deletions(-) create mode 100644 foreshadow/base.py diff --git a/doc/developers.rst b/doc/developers.rst index 98336dc..adf857e 100644 --- a/doc/developers.rst +++ b/doc/developers.rst @@ -166,7 +166,7 @@ Adding transformers is quite simple. Simply write a class with the `fit` `transf .. code-block:: python - from sklearn.base import TransformerMixin, BaseEstimator + from foreshadow.base import TransformerMixin, BaseEstimator from sklearn.utils import check_array class CustomTransformer(BaseEstimator, TransformerMixin): diff --git a/foreshadow/base.py b/foreshadow/base.py new file mode 100644 index 0000000..1185211 --- /dev/null +++ b/foreshadow/base.py @@ -0,0 +1,21 @@ +from sklearn.base import ( + BaseEstimator, + TransformerMixin, +) +import patchy + +_set_params = BaseEstimator.set_params +patchy.patch(_set_params, + """@@ -30,6 +30,6 @@ + setattr(self, key, value) + + for key, sub_params in nested_params.items(): +- valid_params[key].set_params(**sub_params) ++ getattr(self, key).set_params(**sub_params) + + return self + """) + + +BaseEstimator.set_params = _set_params +TransformerMixin = TransformerMixin diff --git a/foreshadow/concrete/internals/boxcox.py b/foreshadow/concrete/internals/boxcox.py index 1a32c05..f556e61 100644 --- a/foreshadow/concrete/internals/boxcox.py +++ b/foreshadow/concrete/internals/boxcox.py @@ -3,7 +3,7 @@ import numpy as np from scipy.special import inv_boxcox1p from scipy.stats import boxcox -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted diff --git a/foreshadow/concrete/internals/cleaners/base.py b/foreshadow/concrete/internals/cleaners/base.py index a007d5f..fd68c6c 100644 --- a/foreshadow/concrete/internals/cleaners/base.py +++ b/foreshadow/concrete/internals/cleaners/base.py @@ -3,7 +3,7 @@ from collections import namedtuple import pandas as pd -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.exceptions import InvalidDataFrame from foreshadow.metrics import avg_col_regex, regex_rows diff --git a/foreshadow/concrete/internals/dropfeature.py b/foreshadow/concrete/internals/dropfeature.py index 7125f05..335c913 100644 --- a/foreshadow/concrete/internals/dropfeature.py +++ b/foreshadow/concrete/internals/dropfeature.py @@ -1,7 +1,7 @@ """DropFeature.""" import numpy as np import pandas as pd -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted diff --git a/foreshadow/concrete/internals/dummyencoder.py b/foreshadow/concrete/internals/dummyencoder.py index 571c60a..f41bb6a 100644 --- a/foreshadow/concrete/internals/dummyencoder.py +++ b/foreshadow/concrete/internals/dummyencoder.py @@ -1,7 +1,7 @@ """DummyEncoder transformer.""" import pandas as pd -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index f103212..1b4ec3b 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -1,6 +1,6 @@ """Fancy imputation.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap @@ -21,6 +21,7 @@ class FancyImputer(BaseEstimator, TransformerMixin): def __init__(self, method="SimpleFill", impute_kwargs={}): self.impute_kwargs = impute_kwargs self.method = method + self._load_imputer() def _load_imputer(self): """Load concrete fancy imputer based on string representation. @@ -68,6 +69,7 @@ def set_params(self, **params): """ out = super().set_params(**params) + self._load_imputer() return out def fit(self, X, y=None): diff --git a/foreshadow/concrete/internals/financial.py b/foreshadow/concrete/internals/financial.py index 1b34b8a..f542b2e 100644 --- a/foreshadow/concrete/internals/financial.py +++ b/foreshadow/concrete/internals/financial.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/htmlremover.py b/foreshadow/concrete/internals/htmlremover.py index 88246f1..d887e56 100644 --- a/foreshadow/concrete/internals/htmlremover.py +++ b/foreshadow/concrete/internals/htmlremover.py @@ -1,7 +1,7 @@ """HTML tag remover and helpers.""" import re -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.utils import check_df from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/labelencoder.py b/foreshadow/concrete/internals/labelencoder.py index 77cb83b..49c2ecf 100644 --- a/foreshadow/concrete/internals/labelencoder.py +++ b/foreshadow/concrete/internals/labelencoder.py @@ -1,6 +1,6 @@ """FixedLabelEncoder.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import LabelEncoder as SklearnLabelEncoder from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/notransform.py b/foreshadow/concrete/internals/notransform.py index d2390fb..789b091 100644 --- a/foreshadow/concrete/internals/notransform.py +++ b/foreshadow/concrete/internals/notransform.py @@ -1,5 +1,5 @@ """No Transform class through acts as a pass through for DataFrame and flag.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/tfidf.py b/foreshadow/concrete/internals/tfidf.py index 11b5cb4..129c85d 100644 --- a/foreshadow/concrete/internals/tfidf.py +++ b/foreshadow/concrete/internals/tfidf.py @@ -1,7 +1,7 @@ """FixedTfidfVectorizer.""" import numpy as np -from sklearn.base import BaseEstimator +from foreshadow.base import BaseEstimator from sklearn.feature_extraction.text import ( TfidfVectorizer as SklearnTfidfVectorizer, VectorizerMixin, diff --git a/foreshadow/concrete/internals/tostring.py b/foreshadow/concrete/internals/tostring.py index 1d73cf6..e34f403 100644 --- a/foreshadow/concrete/internals/tostring.py +++ b/foreshadow/concrete/internals/tostring.py @@ -1,6 +1,6 @@ """To String.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/uncommonremover.py b/foreshadow/concrete/internals/uncommonremover.py index 34292d7..481f1ea 100644 --- a/foreshadow/concrete/internals/uncommonremover.py +++ b/foreshadow/concrete/internals/uncommonremover.py @@ -1,6 +1,6 @@ """Uncommon remover.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted from foreshadow.utils import check_df diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 8b17988..1865cb3 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -3,7 +3,7 @@ import warnings import numpy as np -from sklearn.base import BaseEstimator +from foreshadow.base import BaseEstimator from foreshadow.estimators.config import get_tpot_config from foreshadow.utils import check_df, check_module_installed diff --git a/foreshadow/estimators/meta.py b/foreshadow/estimators/meta.py index 962e2c3..413c97b 100644 --- a/foreshadow/estimators/meta.py +++ b/foreshadow/estimators/meta.py @@ -1,6 +1,6 @@ """Wrapped Estimator.""" -from sklearn.base import BaseEstimator +from foreshadow.base import BaseEstimator from foreshadow.utils import check_df diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 9140f1e..062ddbb 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -3,7 +3,7 @@ import inspect import warnings -from sklearn.base import BaseEstimator +from foreshadow.base import BaseEstimator from sklearn.model_selection._search import BaseSearchCV from foreshadow.columnsharer import ColumnSharer diff --git a/foreshadow/intents/base.py b/foreshadow/intents/base.py index f4f5b71..313d7ea 100644 --- a/foreshadow/intents/base.py +++ b/foreshadow/intents/base.py @@ -1,6 +1,6 @@ """Base Intent for all intent definitions.""" -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin class BaseIntent(BaseEstimator, TransformerMixin): diff --git a/foreshadow/old/intents/registry.py b/foreshadow/old/intents/registry.py index 3660bfa..5f61e35 100644 --- a/foreshadow/old/intents/registry.py +++ b/foreshadow/old/intents/registry.py @@ -2,7 +2,7 @@ # flake8: noqa # from abc import ABCMeta # -# from sklearn.base import BaseEstimator, TransformerMixin +# from foreshadow.base import BaseEstimator, TransformerMixin # # from foreshadow.intents import base # from foreshadow.core import SmartTransformer diff --git a/foreshadow/old/preprocessor.py b/foreshadow/old/preprocessor.py index 4b44225..943e6ea 100644 --- a/foreshadow/old/preprocessor.py +++ b/foreshadow/old/preprocessor.py @@ -3,7 +3,7 @@ # import inspect # from copy import deepcopy # -# from sklearn.base import BaseEstimator, TransformerMixin +# from foreshadow.base import BaseEstimator, TransformerMixin # # from foreshadow.intents import GenericIntent # from foreshadow.intents.registry import registry_eval diff --git a/foreshadow/parallelprocessor.py b/foreshadow/parallelprocessor.py index cd4faf4..395c4f3 100644 --- a/foreshadow/parallelprocessor.py +++ b/foreshadow/parallelprocessor.py @@ -1,7 +1,7 @@ """Foreshadow extension of feature union for handling dataframes.""" import pandas as pd -from sklearn.base import BaseEstimator +from foreshadow.base import BaseEstimator from sklearn.externals.joblib import Parallel, delayed from sklearn.pipeline import ( FeatureUnion, diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index 5885c43..8c75412 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -2,7 +2,7 @@ from abc import ABCMeta, abstractmethod -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.logging import logging from foreshadow.pipeline import SerializablePipeline @@ -157,13 +157,16 @@ def set_params(self, **params): see super. """ - if "transformer" in params: # We load this first as - # BaseEstimator.set_params will call this set_params of this - # object. In the init, we set this initially to None as it will - # later be resolved to whichever concrete transformer is chosen. - # None has no set_params, so we need to set this here, before we - # call to super(). - self.transformer = params["transformer"] + # if "transformer" in params: # We load this first as + # # BaseEstimator.set_params will call this set_params of this + # # object. In the init, we set this initially to None as it will + # # later be resolved to whichever concrete transformer is chosen. + # # None has no set_params, so we need to set this here, before we + # # call to super(). + # # This is required because of sklearn using valid_params[key] + # # which is not the transformer passed in the params, but the + # # current transformer on this object. + # self.transformer = params["transformer"] return super().set_params(**params) @abstractmethod diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 3aba434..a81918c 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -1,7 +1,7 @@ """General base classes used across Foreshadow.""" from collections import MutableMapping, defaultdict, namedtuple -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.concrete.internals.notransform import NoTransform from foreshadow.logging import logging diff --git a/foreshadow/tests/test_core/test_newpreprocessor.py b/foreshadow/tests/test_core/test_newpreprocessor.py index 7106881..5a65414 100644 --- a/foreshadow/tests/test_core/test_newpreprocessor.py +++ b/foreshadow/tests/test_core/test_newpreprocessor.py @@ -18,7 +18,7 @@ def test_preprocessor_none_config(mocker): from foreshadow.columnsharer import ColumnSharer from foreshadow.steps import Preprocessor - from sklearn.base import BaseEstimator, TransformerMixin + from foreshadow.base import BaseEstimator, TransformerMixin class DummyIntent(BaseEstimator, TransformerMixin): def fit(self, X, y=None, **fit_params): @@ -66,7 +66,7 @@ def test_preprocessor_numbers(mocker): from foreshadow.columnsharer import ColumnSharer from foreshadow.steps import Preprocessor - from sklearn.base import BaseEstimator, TransformerMixin + from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.concrete import StandardScaler @@ -132,7 +132,7 @@ def test_preprocessor_columnsharer(mocker, column_sharer): import pandas as pd from foreshadow.steps import Preprocessor - from sklearn.base import BaseEstimator, TransformerMixin + from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.concrete import StandardScaler diff --git a/foreshadow/tests/test_core/test_wrapper.py b/foreshadow/tests/test_core/test_wrapper.py index 5b44032..a58f578 100644 --- a/foreshadow/tests/test_core/test_wrapper.py +++ b/foreshadow/tests/test_core/test_wrapper.py @@ -14,7 +14,7 @@ def test_transformer_wrapper_init(): def test_transformer_wrapper_no_init(): - from sklearn.base import BaseEstimator, TransformerMixin + from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap class NewTransformer(BaseEstimator, TransformerMixin): diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index b2403ec..25f3dfc 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -78,7 +78,7 @@ def test_foreshadow_y_preparer_error(): def test_foreshadow_estimator_custom(): from foreshadow.foreshadow import Foreshadow - from sklearn.base import BaseEstimator + from foreshadow.base import BaseEstimator estimator = BaseEstimator() foreshadow = Foreshadow(estimator=estimator) @@ -98,7 +98,7 @@ def test_foreshadow_estimator_error(): def test_foreshadow_optimizer_custom(): from foreshadow.foreshadow import Foreshadow from sklearn.model_selection._search import BaseSearchCV - from sklearn.base import BaseEstimator + from foreshadow.base import BaseEstimator class DummySearch(BaseSearchCV): pass @@ -334,7 +334,7 @@ def test_foreshadow_predict_diff_cols(): @pytest.mark.skip("borken until parameter optimization is implemented") def test_foreshadow_param_optimize_fit(mocker): import pandas as pd - from sklearn.base import BaseEstimator, TransformerMixin + from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.model_selection._search import BaseSearchCV from foreshadow.foreshadow import Foreshadow diff --git a/foreshadow/tests/test_transformers/test_smart/test_smart.py b/foreshadow/tests/test_transformers/test_smart/test_smart.py index ce72cd6..240e6ad 100644 --- a/foreshadow/tests/test_transformers/test_smart/test_smart.py +++ b/foreshadow/tests/test_transformers/test_smart/test_smart.py @@ -3,6 +3,86 @@ from foreshadow.utils.testing import get_file_path +@pytest.fixture() +def smart_child(): + """Get a defined SmartTransformer subclass, TestSmartTransformer. + + Note: + Always returns StandardScaler. + + """ + from foreshadow.smart import SmartTransformer + from foreshadow.concrete import StandardScaler + + class TestSmartTransformer(SmartTransformer): + def pick_transformer(self, X, y=None, **fit_params): + return StandardScaler() + + yield TestSmartTransformer + + +@pytest.mark.parametrize( + 'deep', + [True, False] +) +def test_smart_get_params_default(smart_child, deep): + """Ensure that default get_params works. + + Args: + smart_child: a smart instance + deep: deep param to get_params + + """ + smart = smart_child() + params = smart.get_params(deep=deep) + default_state = {'check_wrapped': True, + 'column_sharer': None, + 'force_reresolve': False, + 'keep_columns': False, + 'name': None, + 'should_resolve': True, + 'transformer': None, + 'y_var': False} + assert default_state == params + + +@pytest.mark.parametrize( + 'initial_transformer', + [None, "BoxCox", "StandardScaler"] +) +def test_smart_set_params_default(smart_child, initial_transformer): + """Test setting both transformer and its parameters simultaneously works. + + Current sklearn implementation does not allow this and we created our + own BaseEstimator to allow this functionality. + + Args: + smart_child: smart instance + initial_transformer: the initial transformer to put before trying to + set_params(). + + """ + from foreshadow.concrete import StandardScaler + smart = smart_child() + smart.transformer = initial_transformer + params = {'transformer': "StandardScaler", "transformer__with_std": + False} + smart.set_params(**params) + check = {'check_wrapped': True, + 'column_sharer': None, + 'force_reresolve': False, + 'keep_columns': False, + 'name': None, + 'should_resolve': False, + 'y_var': False, + 'transformer__with_std': False, + 'transformer__copy': True, + 'transformer__with_mean': True} + params = smart.get_params() + assert isinstance(params.pop('transformer'), StandardScaler) + assert check == params + + def test_smart_emtpy_input(): import numpy as np diff --git a/foreshadow/utils/validation.py b/foreshadow/utils/validation.py index 6ea98c2..4492dab 100644 --- a/foreshadow/utils/validation.py +++ b/foreshadow/utils/validation.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from sklearn.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.feature_extraction.text import VectorizerMixin diff --git a/poetry.lock b/poetry.lock index 147c908..a06eb87 100644 --- a/poetry.lock +++ b/poetry.lock @@ -72,11 +72,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "19.1.0" -[package.extras] -dev = ["coverage", "hypothesis", "pympler", "pytest", "six", "zope.interface", "sphinx", "pre-commit"] -docs = ["sphinx", "zope.interface"] -tests = ["coverage", "hypothesis", "pympler", "pytest", "six", "zope.interface"] - [[package]] category = "main" description = "Internationalization utilities" @@ -111,9 +106,6 @@ attrs = ">=17.4.0" click = ">=6.5" toml = ">=0.9.4" -[package.extras] -d = ["aiohttp (>=3.3.2)"] - [[package]] category = "dev" description = "A decorator for caching properties in classes." @@ -234,9 +226,6 @@ optional = false python-versions = ">=2.6, !=3.0.*" version = "0.3.0" -[package.extras] -graph = ["objgraph (>=1.7.2)"] - [[package]] category = "main" description = "Docutils -- Python Documentation Utilities" @@ -393,9 +382,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.4.5" -[package.extras] -license = ["editdistance"] - [[package]] category = "main" description = "Internationalized Domain Names in Applications (IDNA)" @@ -423,9 +409,6 @@ version = "0.18" [package.dependencies] zipp = ">=0.5" -[package.extras] -docs = ["sphinx", "docutils (0.12)", "rst.linker"] - [[package]] category = "dev" description = "Read resources from Python packages" @@ -443,9 +426,6 @@ optional = false python-versions = "*" version = "17.5.0" -[package.extras] -scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] - [[package]] category = "dev" description = "A Python utility / library to sort Python imports." @@ -455,15 +435,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "4.3.21" [package.dependencies] -[package.dependencies.toml] -optional = true -version = "*" - -[package.extras] -pipfile = ["pipreqs", "requirementslib"] -pyproject = ["toml"] -requirements = ["pipreqs", "pip-api"] -xdg_home = ["appdirs (>=1.4.0)"] +toml = "*" [[package]] category = "main" @@ -476,9 +448,6 @@ version = "2.10.1" [package.dependencies] MarkupSafe = ">=0.23" -[package.extras] -i18n = ["Babel (>=0.8)"] - [[package]] category = "main" description = "Lightweight pipelining: using Python functions as pipeline jobs." @@ -512,10 +481,6 @@ pyyaml = "*" scipy = ">=0.14" six = ">=1.9.0" -[package.extras] -tests = ["pytest", "pytest-pep8", "pytest-xdist", "pytest-cov", "pytest-timeout", "pandas", "requests"] -visualize = ["pydot (>=1.2.4)"] - [[package]] category = "main" description = "Reference implementations of popular deep learning models" @@ -528,9 +493,6 @@ version = "1.0.8" h5py = "*" numpy = ">=1.9.1" -[package.extras] -tests = ["pytest", "pytest-pep8", "pytest-xdist", "pytest-cov"] - [[package]] category = "main" description = "Easy data preprocessing and data augmentation for deep learning models" @@ -543,11 +505,6 @@ version = "1.1.0" numpy = ">=1.9.1" six = ">=1.9.0" -[package.extras] -image = ["scipy (>=0.14)", "Pillow (>=5.2.0)"] -pep8 = ["flake8"] -tests = ["pandas", "pillow", "tensorflow (1.7)", "keras", "pytest", "pytest-xdist", "pytest-cov"] - [[package]] category = "main" description = "k-Nearest Neighbor imputation" @@ -571,9 +528,6 @@ version = "3.1.1" [package.dependencies] setuptools = ">=36" -[package.extras] -testing = ["coverage", "pyyaml"] - [[package]] category = "main" description = "Safely add untrusted strings to HTML/XML markup." @@ -590,12 +544,6 @@ optional = false python-versions = "*" version = "2.19.5" -[package.extras] -dev = ["python-dateutil", "simplejson", "pytest", "pytz", "flake8 (3.7.4)", "tox"] -lint = ["flake8 (3.7.4)"] -reco = ["python-dateutil", "simplejson"] -tests = ["pytest", "pytz"] - [[package]] category = "dev" description = "McCabe checker, plugin for flake8" @@ -615,11 +563,6 @@ version = "3.0.5" [package.dependencies] six = "*" -[package.extras] -build = ["twine", "wheel", "blurb"] -docs = ["sphinx"] -test = ["pytest", "pytest-cov"] - [[package]] category = "dev" description = "More routines for operating on iterables, beyond itertools" @@ -705,6 +648,17 @@ numpy = ">=1.9.0" python-dateutil = ">=2.5.0" pytz = ">=2011k" +[[package]] +category = "main" +description = "Patch the inner source of python functions at runtime." +name = "patchy" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "1.5.0" + +[package.dependencies] +six = ">=1.9.0" + [[package]] category = "main" description = "A Python package for describing statistical models and for building design matrices." @@ -728,9 +682,6 @@ version = "0.12.0" [package.dependencies] importlib-metadata = ">=0.12" -[package.extras] -dev = ["pre-commit", "tox"] - [[package]] category = "dev" description = "A framework for managing and maintaining multi-language pre-commit hooks." @@ -848,9 +799,6 @@ version = "2.7.1" coverage = ">=4.4" pytest = ">=3.6" -[package.extras] -testing = ["fields", "hunter", "process-tests (2.0.2)", "six", "virtualenv"] - [[package]] category = "dev" description = "Thin-wrapper around the mock package for easier use with py.test" @@ -862,9 +810,6 @@ version = "1.10.4" [package.dependencies] pytest = ">=2.7" -[package.extras] -dev = ["pre-commit", "tox"] - [[package]] category = "main" description = "Extensions to the standard Python datetime module" @@ -906,10 +851,6 @@ chardet = ">=3.0.2,<3.1.0" idna = ">=2.5,<2.9" urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26" -[package.extras] -security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"] - [[package]] category = "main" description = "A set of python modules for machine learning and data mining" @@ -918,9 +859,6 @@ optional = false python-versions = "*" version = "0.19.2" -[package.extras] -alldeps = ["numpy (>=1.8.2)", "scipy (>=0.13.3)"] - [[package]] category = "main" description = "SciPy: Scientific Library for Python" @@ -994,10 +932,6 @@ six = ">=1.5" snowballstemmer = ">=1.1" sphinxcontrib-websupport = "*" -[package.extras] -test = ["mock", "pytest", "pytest-cov", "html5lib", "flake8 (>=3.5.0)", "flake8-import-order", "enum34", "mypy", "typed-ast"] -websupport = ["sqlalchemy (>=0.9)", "whoosh (>=2.0)"] - [[package]] category = "main" description = "Read the Docs theme for Sphinx" @@ -1028,9 +962,6 @@ optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.1.2" -[package.extras] -test = ["pytest", "mock"] - [[package]] category = "main" description = "Statistical computations and models for Python" @@ -1045,11 +976,6 @@ pandas = ">=0.19" patsy = ">=0.4.0" scipy = ">=0.18" -[package.extras] -build = ["cython (>=0.24)"] -develop = ["cython (>=0.24)"] -docs = ["sphinx", "nbconvert", "jupyter-client", "ipykernel", "matplotlib", "nbformat", "numpydoc", "pandas-datareader"] - [[package]] category = "main" description = "Timeout control decorator and context managers, raise any exception in another thread" @@ -1164,10 +1090,6 @@ six = ">=1.0.0,<2" toml = ">=0.9.4" virtualenv = ">=14.0.0" -[package.extras] -docs = ["sphinx (>=2.0.0,<3)", "towncrier (>=18.5.0)", "pygments-github-lexers (>=0.0.5)", "sphinxcontrib-autoprogram (>=0.1.5)"] -testing = ["freezegun (>=0.3.11,<1)", "pathlib2 (>=2.3.3,<3)", "pytest (>=4.0.0,<6)", "pytest-cov (>=2.5.1,<3)", "pytest-mock (>=1.10.0,<2)", "pytest-xdist (>=1.22.2,<2)", "pytest-randomly (>=1.2.3,<2)", "flaky (>=3.4.0,<4)", "psutil (>=5.6.1,<6)"] - [[package]] category = "main" description = "Tree-based Pipeline Optimization Tool" @@ -1187,12 +1109,6 @@ stopit = ">=1.1.1" tqdm = ">=4.26.0" update-checker = ">=0.16" -[package.extras] -dask = ["dask (>=0.18.2)", "distributed (>=1.22.1)", "dask-ml (>=0.9.0)"] -mdr = ["scikit-mdr (>=0.4.4)"] -skrebate = ["skrebate (>=0.3.4)"] -xgboost = ["xgboost (0.6a2)"] - [[package]] category = "main" description = "Fast, Extensible Progress Meter" @@ -1201,9 +1117,6 @@ optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*" version = "4.32.2" -[package.extras] -dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"] - [[package]] category = "main" description = "A python module that will check for package updates." @@ -1223,11 +1136,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" version = "1.25.3" -[package.extras] -brotli = ["brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"] - [[package]] category = "dev" description = "Virtual Python Environment builder" @@ -1236,10 +1144,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "16.7.1" -[package.extras] -docs = ["sphinx (>=1.8.0,<2)", "towncrier (>=18.5.0)", "sphinx-rtd-theme (>=0.4.2,<1)"] -testing = ["pytest (>=4.0.0,<5)", "coverage (>=4.5.0,<5)", "pytest-timeout (>=1.3.0,<2)", "six (>=1.10.0,<2)", "pytest-xdist", "pytest-localserver", "pypiserver", "mock", "xonsh"] - [[package]] category = "main" description = "The comprehensive WSGI web application library." @@ -1248,11 +1152,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "0.15.5" -[package.extras] -dev = ["pytest", "coverage", "tox", "sphinx", "pallets-sphinx-themes", "sphinx-issues"] -termcolor = ["termcolor"] -watchdog = ["watchdog"] - [[package]] category = "main" description = "A built-package format for Python." @@ -1261,9 +1160,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "0.33.4" -[package.extras] -test = ["pytest (>=3.0.0)", "pytest-cov"] - [[package]] category = "main" description = "Module for decorators, wrappers and monkey patching." @@ -1283,9 +1179,6 @@ version = "0.8.3" [package.dependencies] six = "*" -[package.extras] -all = ["pygments", "codecov", "colorama", "pytest", "pytest-cov"] - [[package]] category = "dev" description = "Backport of pathlib-compatible object wrapper for zip files" @@ -1294,15 +1187,11 @@ optional = false python-versions = ">=2.7" version = "0.5.2" -[package.extras] -docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] -testing = ["pathlib2", "contextlib2", "unittest2"] - [extras] doc = ["sphinx", "sphinx_rtd_theme", "sphinxcontrib-plantuml", "docutils"] [metadata] -content-hash = "966f7dbbcd0abb235aa221ee434b2dea9efe1c73680108571645ebdd30a16532" +content-hash = "21268141db69de2b5ae77d0306654321f5028dbdd7dad505b2a3dedce4152845" python-versions = "^3.6" [metadata.hashes] @@ -1325,7 +1214,7 @@ chardet = ["84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", " click = ["2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", "5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"] colorama = ["05eed71e2e327246ad6b38c540c4a3117230b19679b875190486ddd2d721422d", "f8ac84de7840f5b9c4e3347b3c1eaa50f7e49c2b07596221daec5edaabbd7c48"] coverage = ["0c5fe441b9cfdab64719f24e9684502a59432df7570521563d7b1aff27ac755f", "2b412abc4c7d6e019ce7c27cbc229783035eef6d5401695dccba80f481be4eb3", "3684fabf6b87a369017756b551cef29e505cb155ddb892a7a29277b978da88b9", "39e088da9b284f1bd17c750ac672103779f7954ce6125fd4382134ac8d152d74", "3c205bc11cc4fcc57b761c2da73b9b72a59f8d5ca89979afb0c1c6f9e53c7390", "42692db854d13c6c5e9541b6ffe0fe921fe16c9c446358d642ccae1462582d3b", "465ce53a8c0f3a7950dfb836438442f833cf6663d407f37d8c52fe7b6e56d7e8", "48020e343fc40f72a442c8a1334284620f81295256a6b6ca6d8aa1350c763bbe", "4ec30ade438d1711562f3786bea33a9da6107414aed60a5daa974d50a8c2c351", "5296fc86ab612ec12394565c500b412a43b328b3907c0d14358950d06fd83baf", "5f61bed2f7d9b6a9ab935150a6b23d7f84b8055524e7be7715b6513f3328138e", "6899797ac384b239ce1926f3cb86ffc19996f6fa3a1efbb23cb49e0c12d8c18c", "68a43a9f9f83693ce0414d17e019daee7ab3f7113a70c79a3dd4c2f704e4d741", "6b8033d47fe22506856fe450470ccb1d8ba1ffb8463494a15cfc96392a288c09", "7ad7536066b28863e5835e8cfeaa794b7fe352d99a8cded9f43d1161be8e9fbd", "7bacb89ccf4bedb30b277e96e4cc68cd1369ca6841bde7b005191b54d3dd1034", "839dc7c36501254e14331bcb98b27002aa415e4af7ea039d9009409b9d2d5420", "8e679d1bde5e2de4a909efb071f14b472a678b788904440779d2c449c0355b27", "8f9a95b66969cdea53ec992ecea5406c5bd99c9221f539bca1e8406b200ae98c", "932c03d2d565f75961ba1d3cec41ddde00e162c5b46d03f7423edcb807734eab", "93f965415cc51604f571e491f280cff0f5be35895b4eb5e55b47ae90c02a497b", "988529edadc49039d205e0aa6ce049c5ccda4acb2d6c3c5c550c17e8c02c05ba", "998d7e73548fe395eeb294495a04d38942edb66d1fa61eb70418871bc621227e", "9de60893fb447d1e797f6bf08fdf0dbcda0c1e34c1b06c92bd3a363c0ea8c609", "9e80d45d0c7fcee54e22771db7f1b0b126fb4a6c0a2e5afa72f66827207ff2f2", "a545a3dfe5082dc8e8c3eb7f8a2cf4f2870902ff1860bd99b6198cfd1f9d1f49", "a5d8f29e5ec661143621a8f4de51adfb300d7a476224156a39a392254f70687b", "a9abc8c480e103dc05d9b332c6cc9fb1586330356fc14f1aa9c0ca5745097d19", "aca06bfba4759bbdb09bf52ebb15ae20268ee1f6747417837926fae990ebc41d", "bb23b7a6fd666e551a3094ab896a57809e010059540ad20acbeec03a154224ce", "bfd1d0ae7e292105f29d7deaa9d8f2916ed8553ab9d5f39ec65bcf5deadff3f9", "c22ab9f96cbaff05c6a84e20ec856383d27eae09e511d3e6ac4479489195861d", "c62ca0a38958f541a73cf86acdab020c2091631c137bd359c4f5bddde7b75fd4", "c709d8bda72cf4cd348ccec2a4881f2c5848fd72903c185f363d361b2737f773", "c968a6aa7e0b56ecbd28531ddf439c2ec103610d3e2bf3b75b813304f8cb7723", "ca58eba39c68010d7e87a823f22a081b5290e3e3c64714aac3c91481d8b34d22", "df785d8cb80539d0b55fd47183264b7002077859028dfe3070cf6359bf8b2d9c", "f406628ca51e0ae90ae76ea8398677a921b36f0bd71aab2099dfed08abd0322f", "f46087bbd95ebae244a0eda01a618aff11ec7a069b15a3ef8f6b520db523dcf1", "f8019c5279eb32360ca03e9fac40a12667715546eed5c5eb59eb381f2f501260", "fc5f4d209733750afd2714e9109816a29500718b32dd9a5db01c0cb3a019b96a"] -cvxpy = ["4aa7fc03707fccc673bd793572cc5b950ebd304c478cd9c0b6d53ccf7186a3f1", "7a37f30bf62bf2d521bbfd934aa38af718638960c837afa051b088c059e23e88", "d2297643a9223decaed6ea12b3913cf01c4aa659ac4b046a76360d7752447cbe"] +cvxpy = ["13fd80967d306c0c9959304fd633d3e494fa3b82f01e455bf18d7ceeb7f5b6c7", "1b2d3717919841b3a155db462923847a279fcf270a2895145fe43873e41fe6ad", "4aa7fc03707fccc673bd793572cc5b950ebd304c478cd9c0b6d53ccf7186a3f1", "645054acbbcc39a9bd851582224c38db141a98a1386bcc28d88019e95c920ccd", "7a37f30bf62bf2d521bbfd934aa38af718638960c837afa051b088c059e23e88", "d2297643a9223decaed6ea12b3913cf01c4aa659ac4b046a76360d7752447cbe", "d3643b915a195ef20c90aaf7a974e9df6a6831467f1aa7e64bb0bb3c9cb6df41", "faee66f3da014226829ab9b724674c6378a6e7bf57b6223bda1acf6878ef9e32"] darglint = ["651a6029f02715e9b5af0287c1e3787518fe71478d295e4a1c3334a35a9e82a0", "c0a617f42fa196d4e0a2f8246d98b4d16c3fe9728937524a3c35fd2dbef986f6"] deap = ["01ab6067af3c86bd3a00a0d5e0c9860220c7cf412031f9cce18a6d08ec25b808", "0dc11a5521f661a7c7f475466d932b056fbfee8447ad73b007d69ef75c924355", "11162ae0343a25f5a8625f683ce16bad5757812d71db6b80a5ac7c25799a1a88", "1873f5e2a55ff61dac965b55cc042b2fe5529edbd54fc0fc1061664ddb4b75db", "19f6a60c91313cb1f39a9687bc54efab8abc599e5f81b87faedf583efc388602", "21787af1e4a56345bbffa6d1b07f5611d3ef7b299e5e832e6ab28dbab5c5c10e", "2f50f38ae0c82554a476d6c6013c85da4a8d7cac102edc4ec460a658200bd832", "31ed6220068e703d3f54c53046b8f25b85a9225c64e1e50172c2172c4bd8a7fe", "34594ba2f417ccb622b0ff54c25850fde80e12ca89fde6f242b15029e846be29", "3603c91779c276588884321637212511962b2f0668cec56b2b5664d28f28eee7", "5ee3cee4eac683237915bf570ede65047224ac6f392970fed029e3404935647c", "600e95e745cee25fda8c9a67219c9f46c4661da636a5af9f5e924230e7a3aeac", "6102d8bca425ff5d704f7631b69c22e33782e33020ce059cc88085746444ebfe", "669840720da9c4571efd9d0efdf90267009686b7a4c43dd4ab124e33e9cc153a", "6c5ef3b6c387cd28c7aab0297b05a9994b9c88dfd8a89236866c14703d55f9dc", "a0a0e56bd52a262ee12f84fa883b7ec5367532b784e2a6e83b1f7126b69d2300", "a1cc5fc4a2735ec5560ddef84f80beb84540d3221a147b53bba5e6a8718c8a55", "b05f607041c3f8aac5364055cb9632714bc62fe93e53283fbafea9ba91e13a69", "cd0fd7bccf7837b9e6a666b75e1c3a629fa3f5bc346cb90a9edd8cd56f085980", "cf1e53c822526bbc418333c47f668f394b00b51fddb4f15c54d5f190b2b88f17", "e648e1d76d5c8ecbce7f312bd174e4d2613debddd81f2a614b9023f7ad0331a0", "f146a9a0957510b57a2b5c669a26f0b84b2d219000b5684f4827884a75ad2ea7", "f1a0d1390e0b4f9edd4cbf2903c7d60865f43bad00de239aa066ffeda4ad7ee0", "fe789aa74ba78549030037dc9580510ff1763ef12fdf05cb92dda74237110565"] dill = ["993409439ebf7f7902d9de93eaa2a395e0446ff773d29f13dc46646482f76906"] @@ -1370,6 +1259,7 @@ numpy = ["0778076e764e146d3078b17c24c4d89e0ecd4ac5401beff8e1c87879043a0633", "14 osqp = ["02dbcd69dbe07204142909a4bf99df374dae993583f3bc766b2bf8871ae2536f", "1f604f4927b375778570aa6d758c38ff61117fcbd8478fa0563a96662acb1a0b", "2cfa4eba7f92ad6996c3ace35fd82587686521cfbb23f82841636e64fe5b56f0", "39bd29fd23bbdd0a7766e4f90d330ff3cb76a7c2737519a2a307c3dfe3603015", "3a6cb649373f9c7179ba5645656d4eed804d555d87908006c4ae4db413ab3f9c", "3c49c3fd8fcda226407f1deb5326cd6d3951abe64ff86e7fca65c4c533993158", "50c2f70ec4cda87d21f18eb7e6e75f61102daf218c276e4e7bbba951be20481d", "6aa0b91c50ad5d7ab0403031552221e1a1f51fcdb5860ffddbd9c40627299846", "7439a1318f6509be5f49bd0249c15f3ad3b1754f729d4e00c379417058fc5357", "86417db99dc6cac26d7cc3ee53a7936fbe7387b87309b5e2fa6e4fff2445c9c9", "86d58b5a9f8f4dc6fd1dcb02fbb29be8c3bcae59b85620d174c88125d953707d", "86f448a71d35e3156c46efb8d9ccde8253e5858c0c483c1454df473546b60496", "8c46d87410eb4bacdd3211899be2b03fd318751c65de8d9e8ad15bcfe7ee9faf", "9cb4809fbe1afeb9cba17e7f62028de47dae22360abf55e8a29663f856c23e2a", "a7d8e28dc4c5490d4b35a88c17167c5b75ccd2e495dd5ac99fa3d510c1cfccef", "b17c6b28455bf7e3e52209c1ce48b74f86a5bc781104492d344ff93c41930b10", "c5dc13677ecd6def58c0c95f5e0afef9254531ba8c5d65efad940013622b45dd", "cc7cc1a99c54c6f192bdccba0fb263cf8cef3084a2918d3d67109db92e5148e9", "da0424e542a137629a863f71158f8a2fec1def9983c06d969891ef5ae170fc9f", "ded0027a08e2c9572a280fcd6cca9d4ed2f1c50fb80585453be4290062a6e909", "e801b30a8f7c6d51723e8275b6c143b2154d946423b6cb2b973db027b7a9955b", "eda282626fee5e9050cdb09934a91726ec39070da07d5f92b888a4d56404539b", "f14800074b44b54237ff3b892c5ccd7788e739774b739a11cfed9b44466c0471"] packaging = ["0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af", "9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"] pandas = ["11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60", "12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31", "1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051", "242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da", "26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7", "2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a", "31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db", "4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8", "5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4", "647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553", "66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e", "6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f", "be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba", "bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc", "d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed", "d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7", "de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c", "f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f", "f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad", "fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0"] +patchy = ["21609acb2e7d6b5375c605ae1a0f13469c50569db817ee4e62336b0aff103d75", "aae8ad17484b94498c2e4232a3e419cebf526e2ad8a80282f77447e9fd4d8a5c"] patsy = ["5465be1c0e670c3a965355ec09e9a502bf2c4cbe4875e8528b0221190a8a5d40", "f115cec4201e1465cd58b9866b0b0e7b941caafec129869057405bfe5b5e3991"] pluggy = ["0825a152ac059776623854c1543d65a4ad408eb3d33ee114dff91e57ec6ae6fc", "b9817417e95936bf75d85d3f8767f7df6cdde751fc40aed3bb3074cbcb77757c"] pre-commit = ["92e406d556190503630fd801958379861c94884693a032ba66629d0351fdccd4", "cccc39051bc2457b0c0f7152a411f8e05e3ba2fe1a5613e4ee0833c1c1985ce3"] diff --git a/pyproject.toml b/pyproject.toml index 85d6dfc..5d2cbd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ sphinx = { version ="^1.7.6", optional = true } sphinx_rtd_theme = { version ="^0.4.1", optional = true } sphinxcontrib-plantuml = { version ="^0.16.1", optional = true } docutils = { version ="<0.15.1", optional= true } # hot fix: https://github.com/sdispater/poetry/issues/1194 +patchy = "^1.5" [tool.poetry.dev-dependencies] # Linting diff --git a/setup.cfg b/setup.cfg index 39fcbb7..dc8ffcd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,8 +33,8 @@ per-file-ignores = # pytest [tool:pytest] -addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html -;addopts = -s -v --xdoc +;addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html +addopts = -s -vv --xdoc # above is good for pycharm environments. filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning From 1a681a02e36519a87a21afb288279c85ac36544a Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 15:56:11 -0400 Subject: [PATCH 14/37] Cleaning up smart.py of old implementation stuff. --- foreshadow/smart/smart.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index 8c75412..f26ce84 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -157,16 +157,6 @@ def set_params(self, **params): see super. """ - # if "transformer" in params: # We load this first as - # # BaseEstimator.set_params will call this set_params of this - # # object. In the init, we set this initially to None as it will - # # later be resolved to whichever concrete transformer is chosen. - # # None has no set_params, so we need to set this here, before we - # # call to super(). - # # This is required because of sklearn using valid_params[key] - # # which is not the transformer passed in the params, but the - # # current transformer on this object. - # self.transformer = params["transformer"] return super().set_params(**params) @abstractmethod @@ -246,7 +236,7 @@ def fit(self, X, y=None, **fit_params): self.resolve(X, y, **fit_params) self.transformer.full_df = fit_params.pop("full_df", None) self.transformer.fit(X, y, **fit_params) - return self # .transformer.fit(X, y, **fit_params) + return self # This should not return the self.transformer.fit as that will # cause fit_transforms, which call .fit().transform() to fail when # using our wrapper for transformers; TL;DR, it misses the call to From 21ccbeea67972475bbbe1bb3edcfd9d806040aea Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 15:58:45 -0400 Subject: [PATCH 15/37] isort, black, flake --- foreshadow/base.py | 19 +++---- foreshadow/concrete/internals/boxcox.py | 2 +- .../concrete/internals/cleaners/base.py | 2 +- foreshadow/concrete/internals/dropfeature.py | 2 +- foreshadow/concrete/internals/dummyencoder.py | 2 +- foreshadow/concrete/internals/fancyimpute.py | 1 - foreshadow/concrete/internals/financial.py | 2 +- foreshadow/concrete/internals/htmlremover.py | 1 - foreshadow/concrete/internals/labelencoder.py | 2 +- foreshadow/concrete/internals/notransform.py | 1 - foreshadow/concrete/internals/tfidf.py | 2 +- foreshadow/concrete/internals/tostring.py | 1 - .../concrete/internals/uncommonremover.py | 2 +- foreshadow/estimators/auto.py | 2 +- foreshadow/estimators/meta.py | 1 - foreshadow/foreshadow.py | 2 +- foreshadow/parallelprocessor.py | 3 +- foreshadow/smart/smart.py | 1 - foreshadow/steps/preparerstep.py | 1 - .../test_smart/test_smart.py | 54 +++++++++---------- foreshadow/utils/validation.py | 3 +- pyproject.toml | 2 +- 22 files changed, 52 insertions(+), 56 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index 1185211..d2d84d1 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -1,20 +1,21 @@ -from sklearn.base import ( - BaseEstimator, - TransformerMixin, -) +"""Foreshadow version of sklearn.base.py.""" import patchy +from sklearn.base import BaseEstimator, TransformerMixin + _set_params = BaseEstimator.set_params -patchy.patch(_set_params, - """@@ -30,6 +30,6 @@ +patchy.patch( + _set_params, + """@@ -30,6 +30,6 @@ setattr(self, key, value) - + for key, sub_params in nested_params.items(): - valid_params[key].set_params(**sub_params) + getattr(self, key).set_params(**sub_params) - + return self - """) + """, +) BaseEstimator.set_params = _set_params diff --git a/foreshadow/concrete/internals/boxcox.py b/foreshadow/concrete/internals/boxcox.py index f556e61..91b363a 100644 --- a/foreshadow/concrete/internals/boxcox.py +++ b/foreshadow/concrete/internals/boxcox.py @@ -3,10 +3,10 @@ import numpy as np from scipy.special import inv_boxcox1p from scipy.stats import boxcox -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/cleaners/base.py b/foreshadow/concrete/internals/cleaners/base.py index fd68c6c..d9a22f4 100644 --- a/foreshadow/concrete/internals/cleaners/base.py +++ b/foreshadow/concrete/internals/cleaners/base.py @@ -3,8 +3,8 @@ from collections import namedtuple import pandas as pd -from foreshadow.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.exceptions import InvalidDataFrame from foreshadow.metrics import avg_col_regex, regex_rows from foreshadow.utils import check_df diff --git a/foreshadow/concrete/internals/dropfeature.py b/foreshadow/concrete/internals/dropfeature.py index 335c913..e5c184d 100644 --- a/foreshadow/concrete/internals/dropfeature.py +++ b/foreshadow/concrete/internals/dropfeature.py @@ -1,10 +1,10 @@ """DropFeature.""" import numpy as np import pandas as pd -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/dummyencoder.py b/foreshadow/concrete/internals/dummyencoder.py index f41bb6a..62e1d5d 100644 --- a/foreshadow/concrete/internals/dummyencoder.py +++ b/foreshadow/concrete/internals/dummyencoder.py @@ -1,9 +1,9 @@ """DummyEncoder transformer.""" import pandas as pd -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index 1b4ec3b..067ae54 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -1,7 +1,6 @@ """Fancy imputation.""" from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/financial.py b/foreshadow/concrete/internals/financial.py index f542b2e..a4b671a 100644 --- a/foreshadow/concrete/internals/financial.py +++ b/foreshadow/concrete/internals/financial.py @@ -4,8 +4,8 @@ import numpy as np import pandas as pd -from foreshadow.base import BaseEstimator, TransformerMixin +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/htmlremover.py b/foreshadow/concrete/internals/htmlremover.py index d887e56..870d84f 100644 --- a/foreshadow/concrete/internals/htmlremover.py +++ b/foreshadow/concrete/internals/htmlremover.py @@ -2,7 +2,6 @@ import re from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.utils import check_df from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/labelencoder.py b/foreshadow/concrete/internals/labelencoder.py index 49c2ecf..d1af849 100644 --- a/foreshadow/concrete/internals/labelencoder.py +++ b/foreshadow/concrete/internals/labelencoder.py @@ -1,8 +1,8 @@ """FixedLabelEncoder.""" -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import LabelEncoder as SklearnLabelEncoder +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/notransform.py b/foreshadow/concrete/internals/notransform.py index 789b091..6b606d1 100644 --- a/foreshadow/concrete/internals/notransform.py +++ b/foreshadow/concrete/internals/notransform.py @@ -1,6 +1,5 @@ """No Transform class through acts as a pass through for DataFrame and flag.""" from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/tfidf.py b/foreshadow/concrete/internals/tfidf.py index 129c85d..9f330ab 100644 --- a/foreshadow/concrete/internals/tfidf.py +++ b/foreshadow/concrete/internals/tfidf.py @@ -1,13 +1,13 @@ """FixedTfidfVectorizer.""" import numpy as np -from foreshadow.base import BaseEstimator from sklearn.feature_extraction.text import ( TfidfVectorizer as SklearnTfidfVectorizer, VectorizerMixin, ) from sklearn.utils import check_array +from foreshadow.base import BaseEstimator from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/tostring.py b/foreshadow/concrete/internals/tostring.py index e34f403..2c2fdc5 100644 --- a/foreshadow/concrete/internals/tostring.py +++ b/foreshadow/concrete/internals/tostring.py @@ -1,7 +1,6 @@ """To String.""" from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/concrete/internals/uncommonremover.py b/foreshadow/concrete/internals/uncommonremover.py index 481f1ea..2215a28 100644 --- a/foreshadow/concrete/internals/uncommonremover.py +++ b/foreshadow/concrete/internals/uncommonremover.py @@ -1,8 +1,8 @@ """Uncommon remover.""" -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_is_fitted +from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.utils import check_df from foreshadow.wrapper import pandas_wrap diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 1865cb3..0834b49 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -3,8 +3,8 @@ import warnings import numpy as np -from foreshadow.base import BaseEstimator +from foreshadow.base import BaseEstimator from foreshadow.estimators.config import get_tpot_config from foreshadow.utils import check_df, check_module_installed diff --git a/foreshadow/estimators/meta.py b/foreshadow/estimators/meta.py index 413c97b..0880eba 100644 --- a/foreshadow/estimators/meta.py +++ b/foreshadow/estimators/meta.py @@ -1,7 +1,6 @@ """Wrapped Estimator.""" from foreshadow.base import BaseEstimator - from foreshadow.utils import check_df diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 062ddbb..7f9a715 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -3,9 +3,9 @@ import inspect import warnings -from foreshadow.base import BaseEstimator from sklearn.model_selection._search import BaseSearchCV +from foreshadow.base import BaseEstimator from foreshadow.columnsharer import ColumnSharer from foreshadow.estimators.auto import AutoEstimator from foreshadow.estimators.meta import MetaEstimator diff --git a/foreshadow/parallelprocessor.py b/foreshadow/parallelprocessor.py index 395c4f3..c8be221 100644 --- a/foreshadow/parallelprocessor.py +++ b/foreshadow/parallelprocessor.py @@ -1,7 +1,6 @@ """Foreshadow extension of feature union for handling dataframes.""" import pandas as pd -from foreshadow.base import BaseEstimator from sklearn.externals.joblib import Parallel, delayed from sklearn.pipeline import ( FeatureUnion, @@ -10,6 +9,8 @@ _transform_one, ) +from foreshadow.base import BaseEstimator + class ParallelProcessor(FeatureUnion): """Class to support parallel operation on dataframes. diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index f26ce84..dc35328 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -3,7 +3,6 @@ from abc import ABCMeta, abstractmethod from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.logging import logging from foreshadow.pipeline import SerializablePipeline from foreshadow.utils import ( diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index a81918c..23645ac 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -2,7 +2,6 @@ from collections import MutableMapping, defaultdict, namedtuple from foreshadow.base import BaseEstimator, TransformerMixin - from foreshadow.concrete.internals.notransform import NoTransform from foreshadow.logging import logging from foreshadow.parallelprocessor import ParallelProcessor diff --git a/foreshadow/tests/test_transformers/test_smart/test_smart.py b/foreshadow/tests/test_transformers/test_smart/test_smart.py index 240e6ad..54a4aeb 100644 --- a/foreshadow/tests/test_transformers/test_smart/test_smart.py +++ b/foreshadow/tests/test_transformers/test_smart/test_smart.py @@ -21,10 +21,7 @@ def pick_transformer(self, X, y=None, **fit_params): yield TestSmartTransformer -@pytest.mark.parametrize( - 'deep', - [True, False] -) +@pytest.mark.parametrize("deep", [True, False]) def test_smart_get_params_default(smart_child, deep): """Ensure that default get_params works. @@ -35,20 +32,21 @@ def test_smart_get_params_default(smart_child, deep): """ smart = smart_child() params = smart.get_params(deep=deep) - default_state = {'check_wrapped': True, - 'column_sharer': None, - 'force_reresolve': False, - 'keep_columns': False, - 'name': None, - 'should_resolve': True, - 'transformer': None, - 'y_var': False} + default_state = { + "check_wrapped": True, + "column_sharer": None, + "force_reresolve": False, + "keep_columns": False, + "name": None, + "should_resolve": True, + "transformer": None, + "y_var": False, + } assert default_state == params @pytest.mark.parametrize( - 'initial_transformer', - [None, "BoxCox", "StandardScaler"] + "initial_transformer", [None, "BoxCox", "StandardScaler"] ) def test_smart_set_params_default(smart_child, initial_transformer): """Test setting both transformer and its parameters simultaneously works. @@ -63,23 +61,25 @@ def test_smart_set_params_default(smart_child, initial_transformer): """ from foreshadow.concrete import StandardScaler + smart = smart_child() smart.transformer = initial_transformer - params = {'transformer': "StandardScaler", "transformer__with_std": - False} + params = {"transformer": "StandardScaler", "transformer__with_std": False} smart.set_params(**params) - check = {'check_wrapped': True, - 'column_sharer': None, - 'force_reresolve': False, - 'keep_columns': False, - 'name': None, - 'should_resolve': False, - 'y_var': False, - 'transformer__with_std': False, - 'transformer__copy': True, - 'transformer__with_mean': True} + check = { + "check_wrapped": True, + "column_sharer": None, + "force_reresolve": False, + "keep_columns": False, + "name": None, + "should_resolve": False, + "y_var": False, + "transformer__with_std": False, + "transformer__copy": True, + "transformer__with_mean": True, + } params = smart.get_params() - assert isinstance(params.pop('transformer'), StandardScaler) + assert isinstance(params.pop("transformer"), StandardScaler) assert check == params diff --git a/foreshadow/utils/validation.py b/foreshadow/utils/validation.py index 4492dab..b5a56be 100644 --- a/foreshadow/utils/validation.py +++ b/foreshadow/utils/validation.py @@ -4,9 +4,10 @@ import numpy as np import pandas as pd -from foreshadow.base import BaseEstimator, TransformerMixin from sklearn.feature_extraction.text import VectorizerMixin +from foreshadow.base import BaseEstimator, TransformerMixin + PipelineStep = {"NAME": 0, "CLASS": 1, "COLS": 2} diff --git a/pyproject.toml b/pyproject.toml index 5d2cbd5..e26b01f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,7 @@ exclude = ''' [tool.isort] known_first_party = 'foreshadow' -known_third_party = ["category_encoders", "jsonpickle", "marshmallow", "numpy", "pandas", "pytest", "scipy", "six", "sklearn", "tpot", "yaml"] +known_third_party = ["category_encoders", "jsonpickle", "marshmallow", "numpy", "pandas", "patchy", "pytest", "scipy", "six", "sklearn", "tpot", "yaml"] multi_line_output = 3 lines_after_imports = 2 force_grid_wrap = 0 From 0b7f58576b20cfa48f5cf553175215e3142cbb19 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 16:04:05 -0400 Subject: [PATCH 16/37] Documented reasoning for new BaseEstimator --- foreshadow/base.py | 9 +++++++++ foreshadow/estimators/auto.py | 8 -------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index d2d84d1..c9bd074 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -16,6 +16,15 @@ return self """, ) +"""sklearn.base.BaseEstiamtor uses the valid_params to set the params. + +In our use cases, we often modify both an objet and its params. In this case, +the setattr(self, key, value) will change this object, but the +valid_params[key] will have a reference to the old object, not setting the +params on the new object. This is a big issue when we try to simultaneously +change both an object and its params simultaneously. For instsance, +see smart where we set both a transformer and that transformer's params. +""" BaseEstimator.set_params = _set_params diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index 0834b49..c5d96a1 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -326,12 +326,6 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - params.update( - { - "estimator": self.estimator, - "estimator_class": self.estimator_class, - } - ) return params def set_params(self, **params): @@ -344,8 +338,6 @@ def set_params(self, **params): See super. """ - self.estimator = params.pop("estimator", None) - self.estimator_class = params.pop("estimator_class", None) return super().set_params(**params) From 792e741efeab83d1ceb30432baf2baf72d907d74 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 17:22:59 -0400 Subject: [PATCH 17/37] Removing old prints --- foreshadow/metrics.py | 1 - foreshadow/steps/feature_engineerer.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/foreshadow/metrics.py b/foreshadow/metrics.py index 3d17668..c649d08 100644 --- a/foreshadow/metrics.py +++ b/foreshadow/metrics.py @@ -211,7 +211,6 @@ def avg_col_regex(feature, cleaner, mode=min): """ f = feature - print(f) matched_lens = [ ( cleaner(f.at[i, f.columns[0]]).match_lens, diff --git a/foreshadow/steps/feature_engineerer.py b/foreshadow/steps/feature_engineerer.py index 1915a46..bae7983 100644 --- a/foreshadow/steps/feature_engineerer.py +++ b/foreshadow/steps/feature_engineerer.py @@ -41,12 +41,10 @@ def group_by(iterable, column_sharer_key): columns = X.columns.values.tolist() columns_by_domain = group_by(columns, "domain") - print(columns_by_domain) columns_by_domain_and_intent = defaultdict(list) for domain in columns_by_domain: columns_by_intent = group_by(columns_by_domain[domain], "intent") - print(columns_by_intent) for intent in columns_by_intent: columns_by_domain_and_intent[ str(domain) + "_" + intent From 3303359919d9e3443dc2dcf40e7b67403a5f1bf1 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Tue, 13 Aug 2019 17:41:26 -0400 Subject: [PATCH 18/37] fixing setup.cfg --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index dc8ffcd..b943875 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,8 +33,8 @@ per-file-ignores = # pytest [tool:pytest] -;addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html -addopts = -s -vv --xdoc +addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html +;addopts = -s -vv --xdoc # above is good for pycharm environments. filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning From 25c312dbc8eaba4f1b7332561c2b7b9315fcd8ba Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 14 Aug 2019 12:42:58 -0400 Subject: [PATCH 19/37] Planning ParamDistribution --- foreshadow/foreshadow.py | 3 +- foreshadow/optimizers/__init__.py | 2 +- foreshadow/optimizers/optimizer.py | 44 +++++++++++++++++++++ foreshadow/optimizers/param_distribution.py | 43 ++++++++++++++++---- searcher_script.py | 2 - 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 foreshadow/optimizers/optimizer.py diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 9140f1e..cbe5069 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -183,7 +183,8 @@ def fit(self, data_df, y_df): if self.X_preparer is not None: self.pipeline = SerializablePipeline( - [("preparer", self.X_preparer), ("estimator", self.estimator)] + [("X_preparer", self.X_preparer), ("estimator", + self.estimator)] ) else: self.pipeline = SerializablePipeline( diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 22382df..8d34d01 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -1,6 +1,6 @@ """Foreshadow optimizers.""" # from foreshadow.optimizers.param_mapping import param_mapping -from foreshadow.optimizers.param_distribution import ParamSpec +from foreshadow.optimizers.optimizer import ParamSpec __all__ = ["ParamSpec"] diff --git a/foreshadow/optimizers/optimizer.py b/foreshadow/optimizers/optimizer.py new file mode 100644 index 0000000..a9e5d08 --- /dev/null +++ b/foreshadow/optimizers/optimizer.py @@ -0,0 +1,44 @@ +"""Classes for optimizing Foreshadow given a param_distribution.""" +import foreshadow as fs +import foreshadow.serializers as ser + + + +""" +combinations: + X_preparer.cleaner.CHAS: + Cleaner: + - date: + - p1 + - p2 + - financial + IntentMapper: + - Something + + X_preparer.cleaner.CHAS.CleanerMapper: + -Something + + X_preparer.cleaner.CHAS.IntentMapper: + -Something + + + X_preparer: + cleaner: + CHAS: + Cleaner: + date: + -p1 + -p2 + + +Convention: + Column name is last. If a . is present, then applied across all + columns. + +Things that may be swapped: + PreparerSteps, + StepSmartTransformers/ConcreteTransformers. + +""" + + diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 5cd9988..1403da6 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -1,19 +1,48 @@ """Classes to be configured by user for customizing parameter tuning.""" + import foreshadow as fs import foreshadow.serializers as ser -class ParamSpec(fs.Foreshadow, ser.ConcreteSerializerMixin): - def __init__(self, parameter_distribution): - self.parameter_distribution = parameter_distribution +""" +2. cases: + +1. Apply override to initial columns + +In this case, we simply need to override the get_mapping result. This is +hard to do because it is computed at .fit() time, not __init__ time. We need to +compute it at .fit() time because we need access to the dataset. Instead, +we will pass overrides to the __init__ and handle the errors if users choose +wrong columns. + + +2. apply override to a dynamically created transformer + +In this case, the output from a previous step in the PreparerStep's pipeline +created new columns. Thesee will not be available at get_mapping() time. If +we pass in these columns to ParallelProcessor, it will try to slice then out +which will break. We do however know the initial column and, knowing +DynamicPipeline's naming scheme, the new column's name. We can enable an +override on a per column level by passing in the eventual columns to be +overridden to that group process. +""" + + +class ParamSpec(ser.ConcreteSerializerMixin): + def __init__(self, fs_pipeline, X_df, Y_df): + self.fs_pipeline = fs_pipeline + params = self.fs_pipeline.get_params() + print(params) + + def get_params(self, deep=True): - @classmethod - def _get_param_names(cls): - return super()._get_param_names() + fs.Foreshadow._get_param_names() def set_params(self, **params): + pass if __name__ == '__main__': - ParamSpec().to_json("test") + # ParamSpec().to_json("test") + from foreshadow import Foreshadow + ParamSpec(Foreshadow()) \ No newline at end of file diff --git a/searcher_script.py b/searcher_script.py index 699eb87..e3bd5fe 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -116,8 +116,6 @@ def _get_param_iterator(self): self.n_iter, random_state=self.random_state, ) - # for i in out: - # print(i) return out From 55ccdbdf619eb7cea971f49ea255d124a4bf6171 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 14 Aug 2019 12:55:39 -0400 Subject: [PATCH 20/37] CR updates. Adding test and fixing some doc strings. --- foreshadow/base.py | 8 +-- .../test_smart/test_smart.py | 57 +++++++++++++++---- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index c9bd074..a0ad175 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -1,6 +1,7 @@ """Foreshadow version of sklearn.base.py.""" import patchy -from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.base import TransformerMixin # noqa: F401 +from sklearn.base import BaseEstimator _set_params = BaseEstimator.set_params @@ -18,14 +19,13 @@ ) """sklearn.base.BaseEstiamtor uses the valid_params to set the params. -In our use cases, we often modify both an objet and its params. In this case, +In our use cases, we often modify both an object and its params. In this case, the setattr(self, key, value) will change this object, but the valid_params[key] will have a reference to the old object, not setting the params on the new object. This is a big issue when we try to simultaneously -change both an object and its params simultaneously. For instsance, +change both an object and its params =. For instsance, see smart where we set both a transformer and that transformer's params. """ BaseEstimator.set_params = _set_params -TransformerMixin = TransformerMixin diff --git a/foreshadow/tests/test_transformers/test_smart/test_smart.py b/foreshadow/tests/test_transformers/test_smart/test_smart.py index 54a4aeb..035fcbe 100644 --- a/foreshadow/tests/test_transformers/test_smart/test_smart.py +++ b/foreshadow/tests/test_transformers/test_smart/test_smart.py @@ -21,8 +21,23 @@ def pick_transformer(self, X, y=None, **fit_params): yield TestSmartTransformer +@pytest.fixture() +def smart_params(): + """Get the params for a defined SmartTransformer subclass.""" + yield { + "check_wrapped": True, + "column_sharer": None, + "force_reresolve": False, + "keep_columns": False, + "name": None, + "should_resolve": True, + "transformer": None, + "y_var": False, + } + + @pytest.mark.parametrize("deep", [True, False]) -def test_smart_get_params_default(smart_child, deep): +def test_smart_get_params_default(smart_child, smart_params, deep): """Ensure that default get_params works. Args: @@ -32,17 +47,35 @@ def test_smart_get_params_default(smart_child, deep): """ smart = smart_child() params = smart.get_params(deep=deep) - default_state = { - "check_wrapped": True, - "column_sharer": None, - "force_reresolve": False, - "keep_columns": False, - "name": None, - "should_resolve": True, - "transformer": None, - "y_var": False, - } - assert default_state == params + assert smart_params == params + + +@pytest.mark.parametrize( + "initial_transformer", [None, "BoxCox", "StandardScaler"] +) +def test_smart_get_params_deep(smart_child, smart_params, initial_transformer): + """Test that smart.get_params(deep=True) functions as desired. + + Args: + smart_child: SmartTransformer subclass instance fixture + smart_params: default params for above + initial_transformer: the transformer to set on smart.transformer for + the test. + + """ + smart = smart_child() + smart.transformer = initial_transformer + try: + nested_params = smart.transformer.get_params(deep=True) + nested_params = { + "transformer__" + key: val for key, val in nested_params.items() + } + nested_params["should_resolve"] = False + except AttributeError: # case of None + nested_params = {} + nested_params["transformer"] = smart.transformer + smart_params.update(nested_params) + assert smart.get_params(True) == smart_params @pytest.mark.parametrize( From 2ee90f39524177b629aeb8fe38ac71c01c0bd45f Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 14 Aug 2019 14:13:05 -0400 Subject: [PATCH 21/37] Testing ParamSpec --- foreshadow/optimizers/param_distribution.py | 35 ++++++++++---- foreshadow/tests/test_optimizers/__init__.py | 0 .../test_param_distribution.py | 12 +++++ searcher_script.py | 47 +++++++++++++++++++ 4 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 foreshadow/tests/test_optimizers/__init__.py create mode 100644 foreshadow/tests/test_optimizers/test_param_distribution.py diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 1403da6..8d21443 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -1,7 +1,8 @@ """Classes to be configured by user for customizing parameter tuning.""" -import foreshadow as fs +from hyperopt import hp import foreshadow.serializers as ser +from collections import MutableMapping """ @@ -28,18 +29,36 @@ """ -class ParamSpec(ser.ConcreteSerializerMixin): - def __init__(self, fs_pipeline, X_df, Y_df): - self.fs_pipeline = fs_pipeline - params = self.fs_pipeline.get_params() - print(params) +class ParamSpec(MutableMapping, ser.ConcreteSerializerMixin): + def __init__(self, fs_pipeline=None, X_df=None, y_df=None): + if not (fs_pipeline is None) == (X_df is None) == (y_df is None): + raise ValueError("Either all kwargs are None or all are set. To " + "use automatic param determination, pass all " + "kwargs. Otherwise, manual setting can be " + "accomplished using set_params.") + self._param_set = False + self.param_distribution = [] + if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: + params = fs_pipeline.get_params() + for kwarg in kwargs: + key, delim, subkey = kwarg.partition('__') + self.param_distribution[key] = {} + while delim != '': + pass + self._param_set = True def get_params(self, deep=True): - + return self.param_distribution def set_params(self, **params): - pass + self.param_distribibution = params['param_distribution'] + self._param_set = True + + def __call__(self): + return self.param_distribibution + def __getitem__(self, item): + return self. if __name__ == '__main__': diff --git a/foreshadow/tests/test_optimizers/__init__.py b/foreshadow/tests/test_optimizers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/foreshadow/tests/test_optimizers/test_param_distribution.py b/foreshadow/tests/test_optimizers/test_param_distribution.py new file mode 100644 index 0000000..75b5916 --- /dev/null +++ b/foreshadow/tests/test_optimizers/test_param_distribution.py @@ -0,0 +1,12 @@ +"""Test param_distribution.py""" + +import pytest + + +@pytest.fixture() +def paas(): + pass + + +def test_swap_data_preparer(): + pass \ No newline at end of file diff --git a/searcher_script.py b/searcher_script.py index e3bd5fe..6d1fa2d 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -53,6 +53,53 @@ }, ], ) +from collections import MutableMapping +class ParamSpec(MutableMapping): + def __init__(self, fs_pipeline=None, X_df=None, y_df=None): + if not (fs_pipeline is None) == (X_df is None) == (y_df is None): + raise ValueError("Either all kwargs are None or all are set. To " + "use automatic param determination, pass all " + "kwargs. Otherwise, manual setting can be " + "accomplished using set_params.") + self._param_set = False + self.param_distributions = [] + if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: + params = fs_pipeline.get_params() + for kwarg in kwargs: + key, delim, subkey = kwarg.partition('__') + self.param_distribution[key] = {} + while delim != '': + pass + self._param_set = True + + def get_params(self, deep=True): + return self.param_distribution + + def set_params(self, **params): + self.param_distribibutions = params['param_distributions'] + self._param_set = True + + def __call__(self): + return self.param_distribibutions + + def __iter__(self): + return iter(self.param_distributions) + + def __getitem__(self, item): + return self.param_distributions[item] + + def __setitem__(self, key, value): + self.param_distributions[key] = value + + def __len__(self): + return len(self.param_distributions) + + def __delitem__(self, key): + raise NotImplementedError('test') + +# ps = ParamSpec() +# ps.set_params(param_distributions=param_distributions) +# param_distributions = ps class HyperOptSampler(object): From f4db60539c1a56f306eec72dce31cca1840ac417 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 14 Aug 2019 16:30:40 -0400 Subject: [PATCH 22/37] Renaming optimizer to tuner, separate search from tuner. --- foreshadow/optimizers/random_search.py | 76 +++++++++++++++++++ .../optimizers/{optimizer.py => tuner.py} | 73 ++---------------- searcher_script.py | 2 +- 3 files changed, 84 insertions(+), 67 deletions(-) create mode 100644 foreshadow/optimizers/random_search.py rename foreshadow/optimizers/{optimizer.py => tuner.py} (54%) diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py new file mode 100644 index 0000000..000993a --- /dev/null +++ b/foreshadow/optimizers/random_search.py @@ -0,0 +1,76 @@ +"""Random optimization of params.""" + +import six +from sklearn.model_selection._search import BaseSearchCV +from sklearn.utils import check_random_state + +import hyperopt.pyll.stochastic as stoch +from hyperopt import hp + +from .tuner import _replace_list + + +class HyperOptRandomSampler(object): + def __init__(self, param_distributions, n_iter, random_state=None): + self.param_distributions = _replace_list( + None, param_distributions, hp.choice + ) + self.n_iter = n_iter + self.random_state = random_state + + def __iter__(self): + # check if all distributions are given as lists + # in this case we want to sample without replacement + rng = check_random_state(self.random_state) + for _ in six.moves.range(self.n_iter): + # import pdb; pdb.set_trace() + yield stoch.sample(self.param_distributions, rng=rng) + + def __len__(self): + """Number of points that will be sampled.""" + return self.n_iter + + +class RandomSearchCV(BaseSearchCV): + def __init__( + self, + estimator, + param_distributions, + n_iter=10, + scoring=None, + fit_params=None, + n_jobs=1, + iid=True, + refit=True, + cv=None, + verbose=0, + pre_dispatch="2*n_jobs", + random_state=None, + error_score="raise", + return_train_score="warn", + ): + self.param_distributions = param_distributions + self.n_iter = n_iter + self.random_state = random_state + super().__init__( + estimator=estimator, + scoring=scoring, + fit_params=fit_params, + n_jobs=n_jobs, + iid=iid, + refit=refit, + cv=cv, + verbose=verbose, + pre_dispatch=pre_dispatch, + error_score=error_score, + return_train_score=return_train_score, + ) + + def _get_param_iterator(self): + """Return ParameterSampler instance for the given distributions""" + out = HyperOptRandomSampler( + self.param_distributions, + self.n_iter, + random_state=self.random_state, + ) + return out diff --git a/foreshadow/optimizers/optimizer.py b/foreshadow/optimizers/tuner.py similarity index 54% rename from foreshadow/optimizers/optimizer.py rename to foreshadow/optimizers/tuner.py index 6beb285..b5ccc7f 100644 --- a/foreshadow/optimizers/optimizer.py +++ b/foreshadow/optimizers/tuner.py @@ -3,10 +3,12 @@ import six from sklearn.model_selection._search import BaseSearchCV from sklearn.utils import check_random_state -import hyperopt.pyll.stochastic as stoch -from hyperopt import hp + import foreshadow as fs import foreshadow.serializers as ser +import hyperopt.pyll.stochastic as stoch +from foreshadow.base import BaseEstimator, TransformerMixin +from hyperopt import hp """ @@ -84,67 +86,6 @@ def _replace_list(key, obj, replace_with=hp.choice): return obj -class HyperOptRandomSampler(object): - def __init__(self, param_distributions, n_iter, random_state=None): - self.param_distributions = _replace_list(None, - param_distributions, - hp.choice) - self.n_iter = n_iter - self.random_state = random_state - - def __iter__(self): - # check if all distributions are given as lists - # in this case we want to sample without replacement - rng = check_random_state(self.random_state) - for _ in six.moves.range(self.n_iter): - # import pdb; pdb.set_trace() - yield stoch.sample(self.param_distributions, rng=rng) - - def __len__(self): - """Number of points that will be sampled.""" - return self.n_iter - - -class RandomSearchCV(BaseSearchCV): - def __init__( - self, - estimator, - param_distributions, - n_iter=10, - scoring=None, - fit_params=None, - n_jobs=1, - iid=True, - refit=True, - cv=None, - verbose=0, - pre_dispatch="2*n_jobs", - random_state=None, - error_score="raise", - return_train_score="warn", - ): - self.param_distributions = param_distributions - self.n_iter = n_iter - self.random_state = random_state - super().__init__( - estimator=estimator, - scoring=scoring, - fit_params=fit_params, - n_jobs=n_jobs, - iid=iid, - refit=refit, - cv=cv, - verbose=verbose, - pre_dispatch=pre_dispatch, - error_score=error_score, - return_train_score=return_train_score, - ) - - def _get_param_iterator(self): - """Return ParameterSampler instance for the given distributions""" - out = HyperOptRandomSampler( - self.param_distributions, - self.n_iter, - random_state=self.random_state, - ) - return out \ No newline at end of file +class Tuner(BaseEstimator, TransformerMixin) + """Tunes the forshadow object using a ParamSpec and Optimizer.""" + def __init__ diff --git a/searcher_script.py b/searcher_script.py index 93c866a..3740e1d 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -65,7 +65,7 @@ }, ] -from foreshadow.optimizers.optimizer import _replace_list +from foreshadow.optimizers.tuner import _replace_list print(_replace_list(None, test)) print(param_distributions) From a1feddc936df5f61c2b57b4694278367280f8d05 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 14 Aug 2019 16:41:47 -0400 Subject: [PATCH 23/37] Final CR changes. --- foreshadow/base.py | 51 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index a0ad175..89b593d 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -7,12 +7,15 @@ _set_params = BaseEstimator.set_params patchy.patch( _set_params, - """@@ -30,6 +30,6 @@ + """@@ -30,6 +30,9 @@ setattr(self, key, value) for key, sub_params in nested_params.items(): - valid_params[key].set_params(**sub_params) -+ getattr(self, key).set_params(**sub_params) ++ try: ++ getattr(self, key).set_params(**sub_params) ++ except AttributeError: # for Pipelines ++ valid_params[key].set_params(**sub_params) return self """, @@ -20,11 +23,47 @@ """sklearn.base.BaseEstiamtor uses the valid_params to set the params. In our use cases, we often modify both an object and its params. In this case, -the setattr(self, key, value) will change this object, but the -valid_params[key] will have a reference to the old object, not setting the -params on the new object. This is a big issue when we try to simultaneously -change both an object and its params =. For instsance, +the setattr(self, key, value) will change this object (key will refer to its +attribute on the parent object, value to the object itself), but the +valid_params[key] will have a reference to the old aggregate object, +not setting the params on the new object. This is a big issue when we try to +simultaneously change both an object and its params. For instance, see smart where we set both a transformer and that transformer's params. + +In the case of Smart, +where Smart.transformer is a Transformer object, we would see this: + +smart = Smart() +smart.transformer = StandardScaler() + +smart.set_params({'transformer' BoxCox(), 'transformer__param': some_value}) + +First, we get the valid params for this object (smart). +valid_params = self.get_params() +# valid_params['transformer'] == StandardScaler + +get_params does some checking on the params being set. +Now, get_params will set the transformer instance first, before its nested +params, which is desired. + +setattr(self, 'transformer', BoxCox()) + +# Note, valid_params['transformer'] is still StandardScaler. + +Now, we set the nested params for the smart.transformer object +({'transformer__param': some_value}) + +We do this in the nested_params section, which will use the previously +acquired valid_params. +valid_params['transformer'].set_params({'transformer__param': some_value}) + +This would in fact be StandardScaler, NOT BoxCox!. +This is why we do getattr to get the BoxCox which would have been previously +set by the setattr call above. + + +we default back to valid_params[key] when this fails as we are dealing with +a Pipeline object which works differently. """ From f594c436913a5e82c2f4924c4475830fc46112bb Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Fri, 16 Aug 2019 14:17:40 -0400 Subject: [PATCH 24/37] Working tuning version. Changes to foreshadow to support this new tuner. IMPORTANT: cHanges to preparerstep and to DataPreparer to enable from initialization from set_params. --- foreshadow/foreshadow.py | 64 +++- foreshadow/optimizers/__init__.py | 32 +- foreshadow/optimizers/param_distribution.py | 10 +- foreshadow/optimizers/tuner.py | 51 ++- foreshadow/preparer.py | 52 ++-- foreshadow/steps/preparerstep.py | 31 +- foreshadow/tests/test_foreshadow.py | 12 +- .../test_cleaners/test_data_cleaner.py | 17 +- searcher_script.py | 291 ++++++++++-------- 9 files changed, 367 insertions(+), 193 deletions(-) diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 1280ddf..7019f1b 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -4,6 +4,7 @@ import warnings from sklearn.model_selection._search import BaseSearchCV +from sklearn.utils.validation import check_is_fitted from foreshadow.base import BaseEstimator from foreshadow.columnsharer import ColumnSharer @@ -12,6 +13,11 @@ from foreshadow.pipeline import SerializablePipeline from foreshadow.preparer import DataPreparer from foreshadow.utils import check_df +from foreshadow.optimizers import ( + ParamSpec, + Tuner, + test_params +) class Foreshadow(BaseEstimator): @@ -37,17 +43,23 @@ class Foreshadow(BaseEstimator): """ - def __init__( - self, X_preparer=None, y_preparer=None, estimator=None, optimizer=None - ): + def __init__(self, + X_preparer=None, + y_preparer=None, + estimator=None, + optimizer=None, + optimizer_kwargs=None,): self.X_preparer = X_preparer self.y_preparer = y_preparer self.estimator = estimator self.optimizer = optimizer + self.optimizer_kwargs = {} if optimizer_kwargs is \ + None else optimizer_kwargs self.pipeline = None self.data_columns = None if isinstance(self.estimator, AutoEstimator) and optimizer is not None: + # TODO implement V2 architecture here. warnings.warn( "An automatic estimator cannot be used with an optimizer." " Proceeding without use of optimizer" @@ -77,7 +89,8 @@ def X_preparer(self, dp): elif isinstance(dp, DataPreparer): self._X_preprocessor = dp else: - raise ValueError("Invalid value passed as X_preparer") + raise ValueError("Invalid value: '{}' " + "passed as X_preparer".format(dp)) else: self._X_preprocessor = DataPreparer(column_sharer=ColumnSharer()) @@ -160,7 +173,17 @@ def optimizer(self, o): if o is None or (inspect.isclass(o) and issubclass(o, BaseSearchCV)): self._optimizer = o else: - raise ValueError("Invalid value passed as optimizer") + raise ValueError("Invalid optimizer passed.") + + def _reset(self): + try: + check_is_fitted(self, 'pipeline') + del self.pipeline + check_is_fitted(self, 'tuner') + del self.tuner + del self.opt_instance + except: + pass def fit(self, data_df, y_df): """Fit the Foreshadow instance using the provided input data. @@ -173,6 +196,7 @@ def fit(self, data_df, y_df): :obj:`Foreshadow`: The fitted instance. """ + self._reset() X_df = check_df(data_df) y_df = check_df(y_df) self.data_columns = X_df.columns.values.tolist() @@ -192,17 +216,33 @@ def fit(self, data_df, y_df): ) if self.optimizer is not None: + self.pipeline.fit(X_df, y_df) + self.pipeline.predict(X_df) + # print(self.pipeline.get_params(deep=True)) + for key in self.pipeline.get_params().keys(): + if key.find('feature_preprocessor') != -1: + print(key) + print(test_params[0]) + print([x in self.pipeline.get_params().keys() for x in + test_params[0].keys()]) + self.pipeline.set_params(**test_params[0]) + self.pipeline.fit(X_df, y_df) # Calculate parameter search space # param_ranges = param_mapping(deepcopy(self.pipeline), X_df, y_df) - - self.opt_instance = self.optimizer( - self.pipeline, param_ranges # noqa: F821 - ) - self.opt_instance.fit(X_df, y_df) - self.pipeline = self.opt_instance.best_estimator_ + params = ParamSpec() + params.set_params(param_distributions=test_params) + self.opt_instance = self.optimizer(estimator=self.pipeline, + param_distributions=params, + **{'iid': True, + "scoring": "accuracy", + "n_iter": 2, + 'return_train_score': True}) + self.tuner = Tuner(self.pipeline, params, self.opt_instance) + self.tuner.fit(X_df, y_df) + self.pipeline = self.tuner.transform(self.pipeline) # extract trained preprocessors if self.X_preparer is not None: - self.X_preparer = self.opt_instance.best_estimator_.steps[0][1] + self.X_preparer = self.pipeline.steps[0][1] if self.y_preparer is not None: self.y_preparer = self.opt_instance.best_estimator_.steps[1][ 1 diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 22382df..7858e61 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -2,5 +2,35 @@ # from foreshadow.optimizers.param_mapping import param_mapping from foreshadow.optimizers.param_distribution import ParamSpec +from foreshadow.optimizers.tuner import Tuner +from foreshadow.optimizers.random_search import RandomSearchCV +from foreshadow.utils import get_transformer + +test_params = [ + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": [False,True], + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": [(0, 1), (0, 0.5)], + }, + ] + +test_params = [ + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": + get_transformer("OneHotEncoder")(), + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": + [True, False], + }, + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": "LabelEncoder" + }, + ] + +__all__ = ["ParamSpec", "Tuner", "RandomSearchCV", "test_params"] -__all__ = ["ParamSpec"] diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 4b81767..9c2d3e9 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -1,6 +1,5 @@ """Classes to be configured by user for customizing parameter tuning.""" -from hyperopt import hp import foreshadow.serializers as ser from collections import MutableMapping @@ -114,11 +113,6 @@ def __setitem__(self, key, value): def __len__(self): return len(self.param_distributions) - def __delitem__(self, key): + def __delitem__(self, key): # overriding abstract method but should not + # be called raise NotImplementedError('') - - -if __name__ == '__main__': - # ParamSpec().to_json("test") - from foreshadow import Foreshadow - ParamSpec(Foreshadow()) \ No newline at end of file diff --git a/foreshadow/optimizers/tuner.py b/foreshadow/optimizers/tuner.py index b5ccc7f..258fdee 100644 --- a/foreshadow/optimizers/tuner.py +++ b/foreshadow/optimizers/tuner.py @@ -9,6 +9,10 @@ import hyperopt.pyll.stochastic as stoch from foreshadow.base import BaseEstimator, TransformerMixin from hyperopt import hp +from sklearn.utils.validation import check_is_fitted +from sklearn.exceptions import NotFittedError +import importlib +import inspect """ @@ -86,6 +90,47 @@ def _replace_list(key, obj, replace_with=hp.choice): return obj -class Tuner(BaseEstimator, TransformerMixin) - """Tunes the forshadow object using a ParamSpec and Optimizer.""" - def __init__ +def get(optimizer, **optimizer_kwargs): + if isinstance(optimizer, str): + mod = importlib.import_module('foreshadow.optimizers') + return getattr(mod, optimizer)(**optimizer_kwargs) + elif inspect.isclass(optimizer): + return optimizer(**optimizer_kwargs) + return optimizer + + +class Tuner(BaseEstimator, TransformerMixin): + """Tunes the Foreshadow object using a ParamSpec and Optimizer.""" + def __init__(self, pipeline=None, params=None, optimizer=None, + optimizer_kwargs={}): + if pipeline is None: + raise ValueError("'pipeline' is a required arg and is only set to " + "None due to sklearn get_params requirements.") + if params is None: + raise ValueError("'params' is a required arg and is only set to " + "None due to sklearn get_params requirements.") + self.pipeline = pipeline + self.params = params + self.optimizer_kwargs = optimizer_kwargs + self.optimizer = get(optimizer, + estimator=self.pipeline, + param_distributions=self.params, + **self.optimizer_kwargs) + + def _reset(self): + try: + check_is_fitted(self, 'best_pipeline') + del self.best_pipeline + del self.best_params + except NotFittedError: + pass + + def fit(self, X, y, **fit_params): + self._reset() + self.optimizer.fit(X, y, **fit_params) + self.best_pipeline = self.optimizer.best_estimator_ + self.best_params = self.optimizer.best_params_ + + def transform(self, pipeline): + check_is_fitted(self, 'best_pipeline') + return self.best_pipeline diff --git a/foreshadow/preparer.py b/foreshadow/preparer.py index 5ebd5fd..d3bee3b 100644 --- a/foreshadow/preparer.py +++ b/foreshadow/preparer.py @@ -55,13 +55,11 @@ def __init__( reducer_kwargs=None, modeler_kwargs=None, y_var=None, + **kwargs, ): - self.column_sharer = column_sharer - # TODO look at fixing structure so we don't have to import inside init. cleaner_kwargs_ = _none_to_dict( "cleaner_kwargs", cleaner_kwargs, column_sharer ) - self.y_var = y_var intent_kwargs_ = _none_to_dict( "intent_kwargs", intent_kwargs, column_sharer ) @@ -74,31 +72,35 @@ def __init__( reducer_kwargs_ = _none_to_dict( "reducer_kwargs", reducer_kwargs, column_sharer ) + if not y_var: + steps = [ + ("data_cleaner", CleanerMapper(**cleaner_kwargs_)), + ("intent", IntentMapper(**intent_kwargs_)), + ( + "feature_engineerer", + FeatureEngineererMapper(**engineerer_kwargs_), + ), + ( + "feature_preprocessor", + Preprocessor(**preprocessor_kwargs_), + ), + ( + "feature_reducer", + FeatureReducerMapper(**reducer_kwargs_), + ), + ] + else: + steps = [("output", NoTransform())] + if 'steps' in kwargs: # needed for sklearn estimator clone, + # which will try to init the object using get_params. + steps = kwargs.pop('steps') + + self.column_sharer = column_sharer + self.y_var = y_var # modeler_kwargs_ = _none_to_dict( # "modeler_kwargs", modeler_kwargs, column_sharer # ) - if not self.y_var: - super().__init__( - steps=[ - ("data_cleaner", CleanerMapper(**cleaner_kwargs_)), - ("intent", IntentMapper(**intent_kwargs_)), - ( - "feature_engineerer", - FeatureEngineererMapper(**engineerer_kwargs_), - ), - ( - "feature_preprocessor", - Preprocessor(**preprocessor_kwargs_), - ), - ( - "feature_reducer", - FeatureReducerMapper(**reducer_kwargs_), - ), - # ('model_selector', modeler_kwargs_) - ] # TODO add each of these components - ) - else: - super().__init__(steps=[("output", NoTransform())]) + super().__init__(steps, **kwargs) def _get_params(self, attr, deep=True): # attr will be 'steps' if called from pipeline.get_params() diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 23645ac..6eaf1f3 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -1,6 +1,8 @@ """General base classes used across Foreshadow.""" from collections import MutableMapping, defaultdict, namedtuple +from sklearn.utils.validation import check_is_fitted + from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.concrete.internals.notransform import NoTransform from foreshadow.logging import logging @@ -262,10 +264,14 @@ class instead of normal Pipelines. .. #noqa: I102 **kwargs: kwargs to PIpeline constructor. """ + self._parallel_process = None + if "_parallel_process" in kwargs: # clone will try to init using + # the params from get_params, meaning this will be passed + # through even though its not a part of the init. + self._parallel_process = kwargs.pop("_parallel_process") self.column_sharer = column_sharer if self.column_sharer is None: self.column_sharer = ColumnSharer() - self._parallel_process = None super().__init__(**kwargs) @staticmethod @@ -463,13 +469,12 @@ def check_process(self, X): X: input DataFrame """ - if self._parallel_process is None: - logging.debug( - "DataPreparerStep: {} called check_process".format( - self.__class__.__name__ - ) + logging.debug( + "DataPreparerStep: {} called check_process".format( + self.__class__.__name__ ) - self._parallel_process = self.parallelize_smart_steps(X) + ) + self._parallel_process = self.parallelize_smart_steps(X) def fit_transform(self, X, y=None, **fit_params): """Fit then transform this PreparerStep. @@ -502,6 +507,8 @@ def transform(self, X, *args, **kwargs): result from .transform() """ + if getattr(self, '_parallel_process', None) is None: + raise ValueError('not fitted.') return self._parallel_process.transform(X, *args, **kwargs) def inverse_transform(self, X, *args, **kwargs): @@ -538,6 +545,8 @@ def _get_param_names(cls): while cls.__name__ != PreparerStep.__name__: cls = cls.__mro__[1] params += cls._get_param_names() + if '_parallel_process' not in params: + params += ['_parallel_process'] return params def get_params(self, deep=True): @@ -555,9 +564,9 @@ def get_params(self, deep=True): """ params = super().get_params(deep=deep) - params.update( - {"_parallel_process": getattr(self, "_parallel_process", None)} - ) + # params.update( + # {"_parallel_process": getattr(self, "_parallel_process", None)} + # ) return params def set_params(self, **params): @@ -571,5 +580,5 @@ def set_params(self, **params): **params: see super. """ - self._parallel_process = params.pop("_parallel_process", None) + # self._parallel_process = params.pop("_parallel_process", None) super().set_params(**params) diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index 25f3dfc..741a341 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -13,12 +13,12 @@ def test_foreshadow_defaults(): foreshadow = Foreshadow() # defaults assert ( - isinstance(foreshadow.X_preparer, DataPreparer) - and isinstance(foreshadow.y_preparer, DataPreparer) - and isinstance(foreshadow.estimator, AutoEstimator) - and foreshadow.optimizer is None - and foreshadow.pipeline is None - and foreshadow.data_columns is None + isinstance(foreshadow.X_preparer, DataPreparer) + and isinstance(foreshadow.y_preparer, DataPreparer) + and isinstance(foreshadow.estimator, AutoEstimator) + and foreshadow.optimizer is None + and foreshadow.pipeline is None + and foreshadow.data_columns is None ) diff --git a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py index 2d377c1..8253789 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py @@ -114,7 +114,6 @@ def test_json(): "json_random", ], ) - print(data.values, check.values) assert np.all( np.equal(data.values[data.notna()], check.values[check.notna()]) ) @@ -167,4 +166,20 @@ def test_numerical_input_fittransform(): assert np.array_equal(transformed_data, data) +# def test_get_params(): +# import pandas as pd +# from foreshadow.preparer import CleanerMapper +# from foreshadow.columnsharer import ColumnSharer +# +# columns = ["financials"] +# data = pd.DataFrame({"financials": ["", "", "", ""]}, columns=columns) +# cs = ColumnSharer() +# dc = CleanerMapper(column_sharer=cs) +# dc.fit(data) +# from pprint import pprint +# print(pprint(dc.get_params())) + # print(dc._parallel_process) + + + # TODO test graph, could be implemented very wrong. diff --git a/searcher_script.py b/searcher_script.py index 3740e1d..59db646 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -18,15 +18,14 @@ from foreshadow.smart import Scaler from foreshadow.steps import CleanerMapper, IntentMapper, Preprocessor from foreshadow.utils.testing import debug - +from foreshadow.optimizers import RandomSearchCV debug() - data = dt.load_iris() X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] -y_data = pd.DataFrame(data.target, columns=["target"]) +y_data = pd.DataFrame(data.target, columns=["target"])['target'] # cs = ColumnSharer() # p = Preprocessor(column_sharer=cs) @@ -38,131 +37,133 @@ pipe.fit(X_data, y_data) param_distributions = hp.choice( - "s__transformer", - [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": hp.choice("with_mean", [False, - True]), - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": hp.choice( - "feature_range", [(0, 1), (0, 0.5)] - ), - }, - ], - ) + "s__transformer", + [ + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": hp.choice("with_mean", [False, + True]), + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": hp.choice( + "feature_range", [(0, 1), (0, 0.5)] + ), + }, + ], +) test = [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": [False,True], - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": [(0, 1), (0, 0.5)], - }, - ] + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": [False, True], + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": [(0, 1), (0, 0.5)], + }, +] from foreshadow.optimizers.tuner import _replace_list -print(_replace_list(None, test)) -print(param_distributions) -# class HyperOptSampler(object): -# def __init__(self, param_distributions, n_iter, random_state=None): -# self.param_distributions = param_distributions -# self.n_iter = n_iter -# self.random_state = random_state -# -# def __iter__(self): -# # check if all distributions are given as lists -# # in this case we want to sample without replacement -# rng = check_random_state(self.random_state) -# for _ in six.moves.range(self.n_iter): -# # import pdb; pdb.set_trace() -# yield stoch.sample(self.param_distributions, rng=rng) -# -# def __len__(self): -# """Number of points that will be sampled.""" -# return self.n_iter -# -# -# class ShadowSearchCV(BaseSearchCV): -# def __init__( -# self, -# estimator, -# param_distributions, -# n_iter=10, -# scoring=None, -# fit_params=None, -# n_jobs=1, -# iid=True, -# refit=True, -# cv=None, -# verbose=0, -# pre_dispatch="2*n_jobs", -# random_state=None, -# error_score="raise", -# return_train_score="warn", -# ): -# self.param_distributions = param_distributions -# self.n_iter = n_iter -# self.random_state = random_state -# super().__init__( -# estimator=estimator, -# scoring=scoring, -# fit_params=fit_params, -# n_jobs=n_jobs, -# iid=iid, -# refit=refit, -# cv=cv, -# verbose=verbose, -# pre_dispatch=pre_dispatch, -# error_score=error_score, -# return_train_score=return_train_score, -# ) -# -# def _get_param_iterator(self): -# """Return ParameterSampler instance for the given distributions""" -# out = HyperOptSampler( -# self.param_distributions, -# self.n_iter, -# random_state=self.random_state, -# ) -# return out -# -# -# # combinations.yaml -# """ -# combinations: -# X_preparer.cleaner.CHAS: -# Cleaner: -# - date: -# - p1 -# - p2 -# - financial -# IntentMapper: -# - Something -# -# X_preparer.cleaner.CHAS.CleanerMapper: -# -Something -# -# X_preparer.cleaner.CHAS.IntentMapper: -# -Something -# -# -# X_preparer: -# cleaner: -# CHAS: -# Cleaner: -# date: -# -p1 -# -p2 -# -# """ -# +# print(_replace_list(None, test)) +# print(param_distributions) + + +class HyperOptSampler(object): + def __init__(self, param_distributions, n_iter, random_state=None): + self.param_distributions = param_distributions + self.n_iter = n_iter + self.random_state = random_state + + def __iter__(self): + # check if all distributions are given as lists + # in this case we want to sample without replacement + rng = check_random_state(self.random_state) + for _ in six.moves.range(self.n_iter): + # import pdb; pdb.set_trace() + yield stoch.sample(self.param_distributions, rng=rng) + + def __len__(self): + """Number of points that will be sampled.""" + return self.n_iter + + +class ShadowSearchCV(BaseSearchCV): + def __init__( + self, + estimator, + param_distributions, + n_iter=10, + scoring=None, + fit_params=None, + n_jobs=1, + iid=True, + refit=True, + cv=None, + verbose=0, + pre_dispatch="2*n_jobs", + random_state=None, + error_score="raise", + return_train_score="warn", + ): + self.param_distributions = param_distributions + self.n_iter = n_iter + self.random_state = random_state + super().__init__( + estimator=estimator, + scoring=scoring, + fit_params=fit_params, + n_jobs=n_jobs, + iid=iid, + refit=refit, + cv=cv, + verbose=verbose, + pre_dispatch=pre_dispatch, + error_score=error_score, + return_train_score=return_train_score, + ) + + def _get_param_iterator(self): + """Return ParameterSampler instance for the given distributions""" + out = HyperOptSampler( + self.param_distributions, + self.n_iter, + random_state=self.random_state, + ) + return out + + +# combinations.yaml +""" +combinations: + X_preparer.cleaner.CHAS: + Cleaner: + - date: + - p1 + - p2 + - financial + IntentMapper: + - Something + + X_preparer.cleaner.CHAS.CleanerMapper: + -Something + + X_preparer.cleaner.CHAS.IntentMapper: + -Something + + + X_preparer: + cleaner: + CHAS: + Cleaner: + date: + -p1 + -p2 + +""" + # rscv = ShadowSearchCV( # pipe, param_distributions, iid=True, scoring="accuracy", n_iter=10 # ) @@ -175,7 +176,45 @@ # [c for c in results.columns if all(s not in c for s in ["time", "params"])] # ] # -# print(results) - - -# import pdb; pdb.set_trace() +# print(rscv.best_params_) +# print(rscv.best_estimator_) +# # print(results) + +############### + +# from foreshadow.optimizers import RandomSearchCV, Tuner, ParamSpec +# +# ps = ParamSpec() +# test = [ +# { +# "s__transformer": "StandardScaler", +# "s__transformer__with_mean": [False,True], +# }, +# { +# "s__transformer": "MinMaxScaler", +# "s__transformer__feature_range": [(0, 1), (0, 0.5)], +# }, +# ] +# ps.set_params(param_distributions=test) +# t = Tuner(pipe, ps, RandomSearchCV, optimizer_kwargs={'iid': True, +# "scoring": "accuracy", +# "n_iter": 2, +# 'return_train_score': +# True}) +# t.fit(X_data, y_data) +# print(t.best_pipeline) + +import foreshadow + +t = {'iid': True, + "scoring": "accuracy", + "n_iter": 2, + 'return_train_score': + True} + +fs = foreshadow.Foreshadow( + optimizer=RandomSearchCV, + optimizer_kwargs=t, + estimator=lr, +) +fs.fit(X_data, y_data) From a3897c21dc17f1b380fa767e50c05efa4bf58e42 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Sat, 17 Aug 2019 12:10:58 -0400 Subject: [PATCH 25/37] Working tuning version. Changes to foreshadow to support this new tuner. IMPORTANT: Changes to preparerstep and to DataPreparer to enable from initialization from set_params. --- foreshadow/base.py | 2 +- foreshadow/concrete/externals.py | 1 + foreshadow/foreshadow.py | 28 +++++++++++++-------- foreshadow/optimizers/__init__.py | 3 ++- foreshadow/optimizers/param_distribution.py | 6 ++--- foreshadow/optimizers/random_search.py | 5 ++-- foreshadow/preparer.py | 27 +++++++++++++++++--- foreshadow/smart/smart.py | 10 ++++---- foreshadow/steps/preparerstep.py | 21 +++++++++++++--- searcher_script.py | 4 +++ 10 files changed, 77 insertions(+), 30 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index 89b593d..55f6973 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -9,7 +9,7 @@ _set_params, """@@ -30,6 +30,9 @@ setattr(self, key, value) - + for key, sub_params in nested_params.items(): - valid_params[key].set_params(**sub_params) + try: diff --git a/foreshadow/concrete/externals.py b/foreshadow/concrete/externals.py index beceba0..6747ed0 100644 --- a/foreshadow/concrete/externals.py +++ b/foreshadow/concrete/externals.py @@ -16,6 +16,7 @@ MinMaxScaler, RobustScaler, StandardScaler, + LabelEncoder, ) from foreshadow.utils import is_transformer diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 7019f1b..d1afcc4 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -217,16 +217,16 @@ def fit(self, data_df, y_df): if self.optimizer is not None: self.pipeline.fit(X_df, y_df) - self.pipeline.predict(X_df) + # self.pipeline.predict(X_df) # print(self.pipeline.get_params(deep=True)) - for key in self.pipeline.get_params().keys(): - if key.find('feature_preprocessor') != -1: - print(key) - print(test_params[0]) - print([x in self.pipeline.get_params().keys() for x in - test_params[0].keys()]) - self.pipeline.set_params(**test_params[0]) - self.pipeline.fit(X_df, y_df) + # for key in self.pipeline.get_params().keys(): + # if key.find('feature_preprocessor') != -1: + # print(key) + # print(test_params[0]) + # print([x in self.pipeline.get_params().keys() for x in + # test_params[0].keys()]) + # self.pipeline.set_params(**test_params[0]) + # self.pipeline.fit(X_df, y_df) # Calculate parameter search space # param_ranges = param_mapping(deepcopy(self.pipeline), X_df, y_df) params = ParamSpec() @@ -235,11 +235,19 @@ def fit(self, data_df, y_df): param_distributions=params, **{'iid': True, "scoring": "accuracy", - "n_iter": 2, + "n_iter": 10, 'return_train_score': True}) self.tuner = Tuner(self.pipeline, params, self.opt_instance) self.tuner.fit(X_df, y_df) + import pandas as pd + results = pd.DataFrame(self.opt_instance.cv_results_) + results = results[ + [c for c in results.columns if + all(s not in c for s in ["time", "params"])] + ] + print(results) self.pipeline = self.tuner.transform(self.pipeline) + print(self.pipeline) # extract trained preprocessors if self.X_preparer is not None: self.X_preparer = self.pipeline.steps[0][1] diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 7858e61..88801da 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -28,7 +28,8 @@ }, { "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": "LabelEncoder" + ": 0__CategoricalEncoder__transformer__ohe": + get_transformer("HashingEncoder")() }, ] diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 9c2d3e9..ab174dd 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -92,14 +92,14 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): # self._param_set = True def get_params(self, deep=True): - return self.param_distribution + return self.param_distributions def set_params(self, **params): - self.param_distribibutions = params['param_distributions'] + self.param_distributions = params['param_distributions'] self._param_set = True def __call__(self): - return self.param_distribibutions + return self.param_distributions def __iter__(self): return iter(self.param_distributions) diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index 000993a..d875150 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -12,9 +12,10 @@ class HyperOptRandomSampler(object): def __init__(self, param_distributions, n_iter, random_state=None): - self.param_distributions = _replace_list( - None, param_distributions, hp.choice + param_distributions = _replace_list( + None, param_distributions.param_distributions, hp.choice ) + self.param_distributions = param_distributions self.n_iter = n_iter self.random_state = random_state diff --git a/foreshadow/preparer.py b/foreshadow/preparer.py index d3bee3b..dfc6ea9 100644 --- a/foreshadow/preparer.py +++ b/foreshadow/preparer.py @@ -43,8 +43,27 @@ def _none_to_dict(name, val, column_sharer=None): class DataPreparer(Pipeline, PipelineSerializerMixin): - """Predefined pipeline for the foreshadow workflow.""" + """Predefined pipeline for the foreshadow workflow. This Pipeline has 5 + steps: + 1. Cleaning + 2. Intent selection (data type, one of Categorical, Numerical, and Text) + 3. Engineering (Based on intent. Feature generation and reduction) + 4. Preprocessing (Based on intent. Scaling, one hot encoding, etc.) + 5. Reducing (loosely based on intent. Dimensionality reduction). + + In customizing any of the components within these steps: + concrete transformers, SmartTransformers, their params, etc., + the produced columns may change. This entire workflow uses column + names to assign steps to their associated columns, so, changing + components of this workflow may change the column names in the case + that column names were generated for your column based on the + processing step. In this event, if the we will reinstantiate the + entire step (cleaner, intent, etc.) for the column only when necessary. + """ + + # TODO In the future, we will attempt to make this smarter by only + # modifiying the specific transformers needed within each step. def __init__( self, column_sharer=None, @@ -97,9 +116,6 @@ def __init__( self.column_sharer = column_sharer self.y_var = y_var - # modeler_kwargs_ = _none_to_dict( - # "modeler_kwargs", modeler_kwargs, column_sharer - # ) super().__init__(steps, **kwargs) def _get_params(self, attr, deep=True): @@ -109,3 +125,6 @@ def _get_params(self, attr, deep=True): out.update({"steps": steps}) # manually # adding steps to the get_params() return out + + def set_params(self, **kwargs): + return super().set_params(**kwargs) diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index dc35328..e67da4c 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -84,11 +84,6 @@ def transformer(self): """ return self._transformer - def unset_resolve(self): - """Unset resolving for all passes.""" - self.should_resolve = False - self.force_reresolve = False - @transformer.setter def transformer(self, value): """Validate transformer initialization. @@ -127,6 +122,11 @@ def transformer(self, value): self._transformer = value + def unset_resolve(self): + """Unset resolving for all passes.""" + self.should_resolve = False + self.force_reresolve = False + def get_params(self, deep=True): """Get parameters for this estimator. diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 6eaf1f3..d9abca5 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -458,8 +458,7 @@ def fit(self, X, *args, **kwargs): """ # TODO make fit remove a step if nothing is done, rather than a # NoTransform Transformer. - self.check_process(X) - self._parallel_process.fit(X, *args, **kwargs) + self.fit_transform(X, *args, **kwargs) return self def check_process(self, X): @@ -490,8 +489,22 @@ def fit_transform(self, X, y=None, **fit_params): Result from .transform() """ - self.check_process(X) - return self._parallel_process.fit_transform(X, y=y, **fit_params) + try: + return self._parallel_process.fit_transform(X, y=y, **fit_params) + except AttributeError: + if getattr(self, '_parallel_process', None) is None: + self.check_process(X) + except KeyError as e: + if str(e).find ('not in index') != -1: + # This indicates that a transformation step was changed and + # now does not correctly reflect the generated DataFrame as + # this step. We will thus reinitialize the _parallel_process + # so that the best pipeline for this step will be found. + self.check_process(X) + finally: + return self._parallel_process.fit_transform(X, y=y, + **fit_params) + def transform(self, X, *args, **kwargs): """Transform X using this PreparerStep. diff --git a/searcher_script.py b/searcher_script.py index 59db646..7b0f82f 100644 --- a/searcher_script.py +++ b/searcher_script.py @@ -184,6 +184,7 @@ def _get_param_iterator(self): # from foreshadow.optimizers import RandomSearchCV, Tuner, ParamSpec # +# print("simpletest") # ps = ParamSpec() # test = [ # { @@ -204,6 +205,9 @@ def _get_param_iterator(self): # t.fit(X_data, y_data) # print(t.best_pipeline) +############### + +print("foreshadow") import foreshadow t = {'iid': True, From c5d7dbffca11a8a694433ada63fe5f6a83ca36ea Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Sun, 18 Aug 2019 21:47:08 -0400 Subject: [PATCH 26/37] adding simple tuning test. --- foreshadow/optimizers/random_search.py | 16 +++++++-- .../test_param_distribution.py | 11 ------ .../test_optimizers/test_random_search.py | 34 +++++++++++++++++++ .../tests/test_optimizers/test_tuner.py | 5 +++ 4 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 foreshadow/tests/test_optimizers/test_random_search.py create mode 100644 foreshadow/tests/test_optimizers/test_tuner.py diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index d875150..ce5fadb 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -11,21 +11,33 @@ class HyperOptRandomSampler(object): - def __init__(self, param_distributions, n_iter, random_state=None): + def __init__(self, param_distributions, n_iter, random_state=None, + max_tries=100): param_distributions = _replace_list( None, param_distributions.param_distributions, hp.choice ) self.param_distributions = param_distributions self.n_iter = n_iter self.random_state = random_state + self.max_tries = max_tries def __iter__(self): # check if all distributions are given as lists # in this case we want to sample without replacement rng = check_random_state(self.random_state) + prev_samples = [] for _ in six.moves.range(self.n_iter): # import pdb; pdb.set_trace() - yield stoch.sample(self.param_distributions, rng=rng) + sample = stoch.sample(self.param_distributions, rng=rng) + n_tries = 0 + while sample not in prev_samples and n_tries < self.max_tries: + if sample not in prev_samples: + prev_samples.append(sample) + break + sample = stoch.sample(self.param_distributions, rng=rng) + n_tries += 1 + + return iter(prev_samples) def __len__(self): """Number of points that will be sampled.""" diff --git a/foreshadow/tests/test_optimizers/test_param_distribution.py b/foreshadow/tests/test_optimizers/test_param_distribution.py index 75b5916..f905cd9 100644 --- a/foreshadow/tests/test_optimizers/test_param_distribution.py +++ b/foreshadow/tests/test_optimizers/test_param_distribution.py @@ -1,12 +1 @@ """Test param_distribution.py""" - -import pytest - - -@pytest.fixture() -def paas(): - pass - - -def test_swap_data_preparer(): - pass \ No newline at end of file diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py new file mode 100644 index 0000000..8efa631 --- /dev/null +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -0,0 +1,34 @@ +"""Test random_search.py""" + +import pytest + + +@pytest.fixture() +def simple_distribution(): + yield [ + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": [False, True], + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": [(0, 1), (0, 0.5)] + }, + ] + + +def test_random_search_simple(simple_distribution): + dist = simple_distribution + + +def test_random_param_list_simple(simple_distribution): + from foreshadow.optimizers.random_search import HyperOptRandomSampler + from foreshadow.optimizers import ParamSpec + dist = simple_distribution + ps = ParamSpec() + ps.set_params(**{"param_distributions": dist}) + Sampler = HyperOptRandomSampler(ps, 10) + samples = [] + for sample in Sampler: + samples.append(sample) + assert len(samples) == 4 # 4 unique samples. diff --git a/foreshadow/tests/test_optimizers/test_tuner.py b/foreshadow/tests/test_optimizers/test_tuner.py new file mode 100644 index 0000000..a0e0a8e --- /dev/null +++ b/foreshadow/tests/test_optimizers/test_tuner.py @@ -0,0 +1,5 @@ +"""Test tuner.py""" + +import pytest + + From 8c170436f6415f0df1c43909a8b4d416033def91 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 11:42:42 -0400 Subject: [PATCH 27/37] Flushing out simple test cases. --- foreshadow/optimizers/random_search.py | 4 +- .../test_optimizers/test_random_search.py | 88 +++++++++++++++++-- 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index ce5fadb..7a234c5 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -36,7 +36,6 @@ def __iter__(self): break sample = stoch.sample(self.param_distributions, rng=rng) n_tries += 1 - return iter(prev_samples) def __len__(self): @@ -61,10 +60,12 @@ def __init__( random_state=None, error_score="raise", return_train_score="warn", + max_tries=100, ): self.param_distributions = param_distributions self.n_iter = n_iter self.random_state = random_state + self.max_tries = max_tries super().__init__( estimator=estimator, scoring=scoring, @@ -85,5 +86,6 @@ def _get_param_iterator(self): self.param_distributions, self.n_iter, random_state=self.random_state, + max_tries=self.max_tries, ) return out diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py index 8efa631..c187d34 100644 --- a/foreshadow/tests/test_optimizers/test_random_search.py +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -5,7 +5,10 @@ @pytest.fixture() def simple_distribution(): - yield [ + """Simple parameter distribution for testing.""" + from foreshadow.optimizers import ParamSpec + ps = ParamSpec() + dist = [ { "s__transformer": "StandardScaler", "s__transformer__with_mean": [False, True], @@ -15,20 +18,91 @@ def simple_distribution(): "s__transformer__feature_range": [(0, 1), (0, 0.5)] }, ] + ps.set_params(**{"param_distributions": dist}) + yield ps + + +@pytest.fixture() +def iris_data(): + """Iris dataset.""" + import sklearn.datasets as dt + import pandas as pd + data = dt.load_iris() + X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] + y_data = pd.DataFrame(data.target, columns=["target"])['target'] + return X_data, y_data + + +@pytest.fixture() +def estimator(mocker): + """Mocked estimator. .keys method must be set to return all possible keys + from the parameter distribution.""" + counter = [] + + class Estimator: + def __init__(self, **kwargs): + pass + + def set_params(self, *args, **kwargs): + counter.append(kwargs) + return self + def get_params(self, deep=True): + return self.keys() -def test_random_search_simple(simple_distribution): + Estimator.fit = mocker.Mock(return_value=None) + Estimator.score = mocker.Mock(return_value=0.5) + return Estimator, counter + + +def test_random_search_simple(estimator, simple_distribution, iris_data): + """Test that random search finds all different parameter specifications. + + Args: + estimator: fixture estimator + simple_distribution: fixture distribution to parameter optimize on. + iris_data: fixture dataset to use. + + """ + from foreshadow.optimizers import RandomSearchCV + estimator, counter = estimator dist = simple_distribution + keys = {key: None for d in dist.param_distributions for key in d} + estimator.keys = lambda x: keys + estimator = estimator() + X, y = iris_data + rs = RandomSearchCV(estimator=estimator, + param_distributions=dist) + rs.fit(X, y) + unique_samples = set() + for sample in counter: + v = '' + for val in sample.values(): + v += str(val) + unique_samples.add(v) + + assert len(unique_samples) == 4 def test_random_param_list_simple(simple_distribution): + """Test that sampler properly iterates over parameter distribution. + + Args: + simple_distribution: fixture parameter distribution. + + Returns: + + """ from foreshadow.optimizers.random_search import HyperOptRandomSampler - from foreshadow.optimizers import ParamSpec dist = simple_distribution - ps = ParamSpec() - ps.set_params(**{"param_distributions": dist}) - Sampler = HyperOptRandomSampler(ps, 10) + Sampler = HyperOptRandomSampler(dist, 10) samples = [] for sample in Sampler: samples.append(sample) - assert len(samples) == 4 # 4 unique samples. + unique_samples = set() + for sample in samples: + v = '' + for val in sample.values(): + v += str(val) + unique_samples.add(v) + assert len(unique_samples) == 4 # 4 unique samples. From 037a5d7d923d08c0f43e24fd8b6e1b1620aa07a6 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 11:44:17 -0400 Subject: [PATCH 28/37] Removing searcher_script. isort, black. --- foreshadow/concrete/externals.py | 2 +- foreshadow/foreshadow.py | 64 ++--- foreshadow/optimizers/__init__.py | 54 +++-- foreshadow/optimizers/param_distribution.py | 20 +- foreshadow/optimizers/random_search.py | 10 +- foreshadow/optimizers/tuner.py | 47 ++-- foreshadow/preparer.py | 14 +- foreshadow/steps/preparerstep.py | 16 +- foreshadow/tests/test_foreshadow.py | 12 +- .../tests/test_optimizers/test_tuner.py | 2 - .../test_cleaners/test_data_cleaner.py | 3 +- searcher_script.py | 224 ------------------ 12 files changed, 130 insertions(+), 338 deletions(-) delete mode 100644 searcher_script.py diff --git a/foreshadow/concrete/externals.py b/foreshadow/concrete/externals.py index 6747ed0..d0fb771 100644 --- a/foreshadow/concrete/externals.py +++ b/foreshadow/concrete/externals.py @@ -13,10 +13,10 @@ ) from sklearn.preprocessing import ( # noqa: F401 Imputer, + LabelEncoder, MinMaxScaler, RobustScaler, StandardScaler, - LabelEncoder, ) from foreshadow.utils import is_transformer diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index d1afcc4..6b73d06 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -10,14 +10,10 @@ from foreshadow.columnsharer import ColumnSharer from foreshadow.estimators.auto import AutoEstimator from foreshadow.estimators.meta import MetaEstimator +from foreshadow.optimizers import ParamSpec, Tuner, test_params from foreshadow.pipeline import SerializablePipeline from foreshadow.preparer import DataPreparer from foreshadow.utils import check_df -from foreshadow.optimizers import ( - ParamSpec, - Tuner, - test_params -) class Foreshadow(BaseEstimator): @@ -43,18 +39,21 @@ class Foreshadow(BaseEstimator): """ - def __init__(self, - X_preparer=None, - y_preparer=None, - estimator=None, - optimizer=None, - optimizer_kwargs=None,): + def __init__( + self, + X_preparer=None, + y_preparer=None, + estimator=None, + optimizer=None, + optimizer_kwargs=None, + ): self.X_preparer = X_preparer self.y_preparer = y_preparer self.estimator = estimator self.optimizer = optimizer - self.optimizer_kwargs = {} if optimizer_kwargs is \ - None else optimizer_kwargs + self.optimizer_kwargs = ( + {} if optimizer_kwargs is None else optimizer_kwargs + ) self.pipeline = None self.data_columns = None @@ -89,8 +88,9 @@ def X_preparer(self, dp): elif isinstance(dp, DataPreparer): self._X_preprocessor = dp else: - raise ValueError("Invalid value: '{}' " - "passed as X_preparer".format(dp)) + raise ValueError( + "Invalid value: '{}' " "passed as X_preparer".format(dp) + ) else: self._X_preprocessor = DataPreparer(column_sharer=ColumnSharer()) @@ -177,9 +177,9 @@ def optimizer(self, o): def _reset(self): try: - check_is_fitted(self, 'pipeline') + check_is_fitted(self, "pipeline") del self.pipeline - check_is_fitted(self, 'tuner') + check_is_fitted(self, "tuner") del self.tuner del self.opt_instance except: @@ -207,8 +207,10 @@ def fit(self, data_df, y_df): if self.X_preparer is not None: self.pipeline = SerializablePipeline( - [("X_preparer", self.X_preparer), ("estimator", - self.estimator)] + [ + ("X_preparer", self.X_preparer), + ("estimator", self.estimator), + ] ) else: self.pipeline = SerializablePipeline( @@ -231,19 +233,27 @@ def fit(self, data_df, y_df): # param_ranges = param_mapping(deepcopy(self.pipeline), X_df, y_df) params = ParamSpec() params.set_params(param_distributions=test_params) - self.opt_instance = self.optimizer(estimator=self.pipeline, - param_distributions=params, - **{'iid': True, - "scoring": "accuracy", - "n_iter": 10, - 'return_train_score': True}) + self.opt_instance = self.optimizer( + estimator=self.pipeline, + param_distributions=params, + **{ + "iid": True, + "scoring": "accuracy", + "n_iter": 10, + "return_train_score": True, + } + ) self.tuner = Tuner(self.pipeline, params, self.opt_instance) self.tuner.fit(X_df, y_df) import pandas as pd + results = pd.DataFrame(self.opt_instance.cv_results_) results = results[ - [c for c in results.columns if - all(s not in c for s in ["time", "params"])] + [ + c + for c in results.columns + if all(s not in c for s in ["time", "params"]) + ] ] print(results) self.pipeline = self.tuner.transform(self.pipeline) diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 88801da..5d3b893 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -2,36 +2,40 @@ # from foreshadow.optimizers.param_mapping import param_mapping from foreshadow.optimizers.param_distribution import ParamSpec -from foreshadow.optimizers.tuner import Tuner from foreshadow.optimizers.random_search import RandomSearchCV +from foreshadow.optimizers.tuner import Tuner from foreshadow.utils import get_transformer + test_params = [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": [False,True], - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": [(0, 1), (0, 0.5)], - }, - ] + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": [False, True], + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": [(0, 1), (0, 0.5)], + }, +] test_params = [ - { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": - get_transformer("OneHotEncoder")(), - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": - [True, False], - }, - { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": - get_transformer("HashingEncoder")() - }, - ] + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": get_transformer( + "OneHotEncoder" + )(), + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": [ + True, + False, + ], + }, + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": get_transformer( + "HashingEncoder" + )() + }, +] __all__ = ["ParamSpec", "Tuner", "RandomSearchCV", "test_params"] - diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index ab174dd..e574bc5 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -1,8 +1,9 @@ """Classes to be configured by user for customizing parameter tuning.""" -import foreshadow.serializers as ser from collections import MutableMapping +import foreshadow.serializers as ser + """ 2. cases: @@ -72,16 +73,19 @@ class ParamSpec(MutableMapping, ser.ConcreteSerializerMixin): Here, the dicts are used to tell the optimizer where to values to set are. The lists showcase the different values that are possible. """ + def __init__(self, fs_pipeline=None, X_df=None, y_df=None): if not (fs_pipeline is None) == (X_df is None) == (y_df is None): - raise ValueError("Either all kwargs are None or all are set. To " - "use automatic param determination, pass all " - "kwargs. Otherwise, manual setting can be " - "accomplished using set_params.") + raise ValueError( + "Either all kwargs are None or all are set. To " + "use automatic param determination, pass all " + "kwargs. Otherwise, manual setting can be " + "accomplished using set_params." + ) self._param_set = False self.param_distributions = [] if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: - raise NotImplementedError('Automatic param spec not implemented') + raise NotImplementedError("Automatic param spec not implemented") # automatic pipelining. # params = fs_pipeline.get_params() # for kwarg in kwargs: @@ -95,7 +99,7 @@ def get_params(self, deep=True): return self.param_distributions def set_params(self, **params): - self.param_distributions = params['param_distributions'] + self.param_distributions = params["param_distributions"] self._param_set = True def __call__(self): @@ -115,4 +119,4 @@ def __len__(self): def __delitem__(self, key): # overriding abstract method but should not # be called - raise NotImplementedError('') + raise NotImplementedError("") diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index 7a234c5..d918d33 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -1,18 +1,18 @@ """Random optimization of params.""" +import hyperopt.pyll.stochastic as stoch import six +from hyperopt import hp from sklearn.model_selection._search import BaseSearchCV from sklearn.utils import check_random_state -import hyperopt.pyll.stochastic as stoch -from hyperopt import hp - from .tuner import _replace_list class HyperOptRandomSampler(object): - def __init__(self, param_distributions, n_iter, random_state=None, - max_tries=100): + def __init__( + self, param_distributions, n_iter, random_state=None, max_tries=100 + ): param_distributions = _replace_list( None, param_distributions.param_distributions, hp.choice ) diff --git a/foreshadow/optimizers/tuner.py b/foreshadow/optimizers/tuner.py index 258fdee..9145c3b 100644 --- a/foreshadow/optimizers/tuner.py +++ b/foreshadow/optimizers/tuner.py @@ -1,18 +1,19 @@ """Classes for optimizing Foreshadow given a param_distribution.""" +import importlib +import inspect + +import hyperopt.pyll.stochastic as stoch import six +from hyperopt import hp +from sklearn.exceptions import NotFittedError from sklearn.model_selection._search import BaseSearchCV from sklearn.utils import check_random_state +from sklearn.utils.validation import check_is_fitted import foreshadow as fs import foreshadow.serializers as ser -import hyperopt.pyll.stochastic as stoch from foreshadow.base import BaseEstimator, TransformerMixin -from hyperopt import hp -from sklearn.utils.validation import check_is_fitted -from sklearn.exceptions import NotFittedError -import importlib -import inspect """ @@ -92,7 +93,7 @@ def _replace_list(key, obj, replace_with=hp.choice): def get(optimizer, **optimizer_kwargs): if isinstance(optimizer, str): - mod = importlib.import_module('foreshadow.optimizers') + mod = importlib.import_module("foreshadow.optimizers") return getattr(mod, optimizer)(**optimizer_kwargs) elif inspect.isclass(optimizer): return optimizer(**optimizer_kwargs) @@ -101,25 +102,33 @@ def get(optimizer, **optimizer_kwargs): class Tuner(BaseEstimator, TransformerMixin): """Tunes the Foreshadow object using a ParamSpec and Optimizer.""" - def __init__(self, pipeline=None, params=None, optimizer=None, - optimizer_kwargs={}): + + def __init__( + self, pipeline=None, params=None, optimizer=None, optimizer_kwargs={} + ): if pipeline is None: - raise ValueError("'pipeline' is a required arg and is only set to " - "None due to sklearn get_params requirements.") + raise ValueError( + "'pipeline' is a required arg and is only set to " + "None due to sklearn get_params requirements." + ) if params is None: - raise ValueError("'params' is a required arg and is only set to " - "None due to sklearn get_params requirements.") + raise ValueError( + "'params' is a required arg and is only set to " + "None due to sklearn get_params requirements." + ) self.pipeline = pipeline self.params = params self.optimizer_kwargs = optimizer_kwargs - self.optimizer = get(optimizer, - estimator=self.pipeline, - param_distributions=self.params, - **self.optimizer_kwargs) + self.optimizer = get( + optimizer, + estimator=self.pipeline, + param_distributions=self.params, + **self.optimizer_kwargs + ) def _reset(self): try: - check_is_fitted(self, 'best_pipeline') + check_is_fitted(self, "best_pipeline") del self.best_pipeline del self.best_params except NotFittedError: @@ -132,5 +141,5 @@ def fit(self, X, y, **fit_params): self.best_params = self.optimizer.best_params_ def transform(self, pipeline): - check_is_fitted(self, 'best_pipeline') + check_is_fitted(self, "best_pipeline") return self.best_pipeline diff --git a/foreshadow/preparer.py b/foreshadow/preparer.py index dfc6ea9..0cab057 100644 --- a/foreshadow/preparer.py +++ b/foreshadow/preparer.py @@ -99,20 +99,14 @@ def __init__( "feature_engineerer", FeatureEngineererMapper(**engineerer_kwargs_), ), - ( - "feature_preprocessor", - Preprocessor(**preprocessor_kwargs_), - ), - ( - "feature_reducer", - FeatureReducerMapper(**reducer_kwargs_), - ), + ("feature_preprocessor", Preprocessor(**preprocessor_kwargs_)), + ("feature_reducer", FeatureReducerMapper(**reducer_kwargs_)), ] else: steps = [("output", NoTransform())] - if 'steps' in kwargs: # needed for sklearn estimator clone, + if "steps" in kwargs: # needed for sklearn estimator clone, # which will try to init the object using get_params. - steps = kwargs.pop('steps') + steps = kwargs.pop("steps") self.column_sharer = column_sharer self.y_var = y_var diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index d9abca5..6a64fc4 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -492,19 +492,17 @@ def fit_transform(self, X, y=None, **fit_params): try: return self._parallel_process.fit_transform(X, y=y, **fit_params) except AttributeError: - if getattr(self, '_parallel_process', None) is None: + if getattr(self, "_parallel_process", None) is None: self.check_process(X) except KeyError as e: - if str(e).find ('not in index') != -1: + if str(e).find("not in index") != -1: # This indicates that a transformation step was changed and # now does not correctly reflect the generated DataFrame as # this step. We will thus reinitialize the _parallel_process # so that the best pipeline for this step will be found. self.check_process(X) finally: - return self._parallel_process.fit_transform(X, y=y, - **fit_params) - + return self._parallel_process.fit_transform(X, y=y, **fit_params) def transform(self, X, *args, **kwargs): """Transform X using this PreparerStep. @@ -520,8 +518,8 @@ def transform(self, X, *args, **kwargs): result from .transform() """ - if getattr(self, '_parallel_process', None) is None: - raise ValueError('not fitted.') + if getattr(self, "_parallel_process", None) is None: + raise ValueError("not fitted.") return self._parallel_process.transform(X, *args, **kwargs) def inverse_transform(self, X, *args, **kwargs): @@ -558,8 +556,8 @@ def _get_param_names(cls): while cls.__name__ != PreparerStep.__name__: cls = cls.__mro__[1] params += cls._get_param_names() - if '_parallel_process' not in params: - params += ['_parallel_process'] + if "_parallel_process" not in params: + params += ["_parallel_process"] return params def get_params(self, deep=True): diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index 741a341..25f3dfc 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -13,12 +13,12 @@ def test_foreshadow_defaults(): foreshadow = Foreshadow() # defaults assert ( - isinstance(foreshadow.X_preparer, DataPreparer) - and isinstance(foreshadow.y_preparer, DataPreparer) - and isinstance(foreshadow.estimator, AutoEstimator) - and foreshadow.optimizer is None - and foreshadow.pipeline is None - and foreshadow.data_columns is None + isinstance(foreshadow.X_preparer, DataPreparer) + and isinstance(foreshadow.y_preparer, DataPreparer) + and isinstance(foreshadow.estimator, AutoEstimator) + and foreshadow.optimizer is None + and foreshadow.pipeline is None + and foreshadow.data_columns is None ) diff --git a/foreshadow/tests/test_optimizers/test_tuner.py b/foreshadow/tests/test_optimizers/test_tuner.py index a0e0a8e..1c7b879 100644 --- a/foreshadow/tests/test_optimizers/test_tuner.py +++ b/foreshadow/tests/test_optimizers/test_tuner.py @@ -1,5 +1,3 @@ """Test tuner.py""" import pytest - - diff --git a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py index 8253789..3206c12 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py @@ -178,8 +178,7 @@ def test_numerical_input_fittransform(): # dc.fit(data) # from pprint import pprint # print(pprint(dc.get_params())) - # print(dc._parallel_process) - +# print(dc._parallel_process) # TODO test graph, could be implemented very wrong. diff --git a/searcher_script.py b/searcher_script.py deleted file mode 100644 index 7b0f82f..0000000 --- a/searcher_script.py +++ /dev/null @@ -1,224 +0,0 @@ -import hyperopt.pyll.stochastic as stoch -import numpy as np -import pandas as pd -import six -import sklearn.datasets as dt -from hpsklearn import HyperoptEstimator, extra_trees -from hyperopt import hp, tpe -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score -from sklearn.model_selection import ParameterSampler, RandomizedSearchCV -from sklearn.model_selection._search import BaseSearchCV -from sklearn.pipeline import Pipeline -from sklearn.utils import check_random_state -from sklearn.utils.fixes import _Mapping as Mapping, _Sequence as Sequence - -from foreshadow.columnsharer import ColumnSharer -from foreshadow.preparer import DataPreparer -from foreshadow.smart import Scaler -from foreshadow.steps import CleanerMapper, IntentMapper, Preprocessor -from foreshadow.utils.testing import debug -from foreshadow.optimizers import RandomSearchCV - -debug() - -data = dt.load_iris() - -X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] -y_data = pd.DataFrame(data.target, columns=["target"])['target'] - -# cs = ColumnSharer() -# p = Preprocessor(column_sharer=cs) -s = Scaler() -lr = LogisticRegression() - -pipe = Pipeline([("s", s), ("lr", lr)]) - -pipe.fit(X_data, y_data) - -param_distributions = hp.choice( - "s__transformer", - [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": hp.choice("with_mean", [False, - True]), - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": hp.choice( - "feature_range", [(0, 1), (0, 0.5)] - ), - }, - ], -) - -test = [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": [False, True], - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": [(0, 1), (0, 0.5)], - }, -] - -from foreshadow.optimizers.tuner import _replace_list - - -# print(_replace_list(None, test)) -# print(param_distributions) - - -class HyperOptSampler(object): - def __init__(self, param_distributions, n_iter, random_state=None): - self.param_distributions = param_distributions - self.n_iter = n_iter - self.random_state = random_state - - def __iter__(self): - # check if all distributions are given as lists - # in this case we want to sample without replacement - rng = check_random_state(self.random_state) - for _ in six.moves.range(self.n_iter): - # import pdb; pdb.set_trace() - yield stoch.sample(self.param_distributions, rng=rng) - - def __len__(self): - """Number of points that will be sampled.""" - return self.n_iter - - -class ShadowSearchCV(BaseSearchCV): - def __init__( - self, - estimator, - param_distributions, - n_iter=10, - scoring=None, - fit_params=None, - n_jobs=1, - iid=True, - refit=True, - cv=None, - verbose=0, - pre_dispatch="2*n_jobs", - random_state=None, - error_score="raise", - return_train_score="warn", - ): - self.param_distributions = param_distributions - self.n_iter = n_iter - self.random_state = random_state - super().__init__( - estimator=estimator, - scoring=scoring, - fit_params=fit_params, - n_jobs=n_jobs, - iid=iid, - refit=refit, - cv=cv, - verbose=verbose, - pre_dispatch=pre_dispatch, - error_score=error_score, - return_train_score=return_train_score, - ) - - def _get_param_iterator(self): - """Return ParameterSampler instance for the given distributions""" - out = HyperOptSampler( - self.param_distributions, - self.n_iter, - random_state=self.random_state, - ) - return out - - -# combinations.yaml -""" -combinations: - X_preparer.cleaner.CHAS: - Cleaner: - - date: - - p1 - - p2 - - financial - IntentMapper: - - Something - - X_preparer.cleaner.CHAS.CleanerMapper: - -Something - - X_preparer.cleaner.CHAS.IntentMapper: - -Something - - - X_preparer: - cleaner: - CHAS: - Cleaner: - date: - -p1 - -p2 - -""" - -# rscv = ShadowSearchCV( -# pipe, param_distributions, iid=True, scoring="accuracy", n_iter=10 -# ) -# -# # print("Train Accuracy: {}".format(accuracy_score(y_data, pipe.predict(X_data)))) -# -# rscv.fit(X_data, y_data) -# results = pd.DataFrame(rscv.cv_results_) -# results = results[ -# [c for c in results.columns if all(s not in c for s in ["time", "params"])] -# ] -# -# print(rscv.best_params_) -# print(rscv.best_estimator_) -# # print(results) - -############### - -# from foreshadow.optimizers import RandomSearchCV, Tuner, ParamSpec -# -# print("simpletest") -# ps = ParamSpec() -# test = [ -# { -# "s__transformer": "StandardScaler", -# "s__transformer__with_mean": [False,True], -# }, -# { -# "s__transformer": "MinMaxScaler", -# "s__transformer__feature_range": [(0, 1), (0, 0.5)], -# }, -# ] -# ps.set_params(param_distributions=test) -# t = Tuner(pipe, ps, RandomSearchCV, optimizer_kwargs={'iid': True, -# "scoring": "accuracy", -# "n_iter": 2, -# 'return_train_score': -# True}) -# t.fit(X_data, y_data) -# print(t.best_pipeline) - -############### - -print("foreshadow") -import foreshadow - -t = {'iid': True, - "scoring": "accuracy", - "n_iter": 2, - 'return_train_score': - True} - -fs = foreshadow.Foreshadow( - optimizer=RandomSearchCV, - optimizer_kwargs=t, - estimator=lr, -) -fs.fit(X_data, y_data) From 2ba97aadc7808a79df52a0cbbcd71550e87232e1 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 13:00:46 -0400 Subject: [PATCH 29/37] flake8 --- foreshadow/base.py | 2 +- foreshadow/foreshadow.py | 4 +- foreshadow/optimizers/param_distribution.py | 105 ++++++++++++++++-- foreshadow/optimizers/random_search.py | 37 +++++- foreshadow/optimizers/tuner.py | 43 +++++-- foreshadow/preparer.py | 6 +- foreshadow/serializers.py | 31 ++++++ foreshadow/steps/preparerstep.py | 5 +- foreshadow/tests/test_foreshadow.py | 9 +- .../test_optimizers/test_random_search.py | 33 +++--- .../tests/test_optimizers/test_tuner.py | 2 - pyproject.toml | 2 +- 12 files changed, 225 insertions(+), 54 deletions(-) diff --git a/foreshadow/base.py b/foreshadow/base.py index 55f6973..89b593d 100644 --- a/foreshadow/base.py +++ b/foreshadow/base.py @@ -9,7 +9,7 @@ _set_params, """@@ -30,6 +30,9 @@ setattr(self, key, value) - + for key, sub_params in nested_params.items(): - valid_params[key].set_params(**sub_params) + try: diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 6b73d06..7ad863c 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -173,7 +173,7 @@ def optimizer(self, o): if o is None or (inspect.isclass(o) and issubclass(o, BaseSearchCV)): self._optimizer = o else: - raise ValueError("Invalid optimizer passed.") + raise ValueError("Invalid optimizer: '{}' passed.".format(o)) def _reset(self): try: @@ -182,7 +182,7 @@ def _reset(self): check_is_fitted(self, "tuner") del self.tuner del self.opt_instance - except: + except AttributeError: pass def fit(self, data_df, y_df): diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index e574bc5..2a586d0 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -10,21 +10,21 @@ 1. Apply override to initial columns -In this case, we simply need to override the get_mapping result. This is +In this case, we simply need to override the get_mapping result. This is hard to do because it is computed at .fit() time, not __init__ time. We need to -compute it at .fit() time because we need access to the dataset. Instead, -we will pass overrides to the __init__ and handle the errors if users choose +compute it at .fit() time because we need access to the dataset. Instead, +we will pass overrides to the __init__ and handle the errors if users choose wrong columns. 2. apply override to a dynamically created transformer -In this case, the output from a previous step in the PreparerStep's pipeline -created new columns. Thesee will not be available at get_mapping() time. If -we pass in these columns to ParallelProcessor, it will try to slice then out -which will break. We do however know the initial column and, knowing -DynamicPipeline's naming scheme, the new column's name. We can enable an -override on a per column level by passing in the eventual columns to be +In this case, the output from a previous step in the PreparerStep's pipeline +created new columns. Thesee will not be available at get_mapping() time. If +we pass in these columns to ParallelProcessor, it will try to slice then out +which will break. We do however know the initial column and, knowing +DynamicPipeline's naming scheme, the new column's name. We can enable an +override on a per column level by passing in the eventual columns to be overridden to that group process. @@ -75,6 +75,22 @@ class ParamSpec(MutableMapping, ser.ConcreteSerializerMixin): """ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): + """Initialize, and if args are passed, auto create param distribution. + + Only pass the init arguments if automatic param spec determination + is desired. + + Args: + fs_pipeline: Foreshadow.pipeline + X_df: input DataFrame of data points + y_df: input DataFrame of labels + + Raises: + ValueError: if either all kwargs are not passed or all aren't + passed. + NotImplementedError: All kwargs passed. + + """ if not (fs_pipeline is None) == (X_df is None) == (y_df is None): raise ValueError( "Either all kwargs are None or all are set. To " @@ -96,27 +112,92 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): # self._param_set = True def get_params(self, deep=True): + """Get the params for this object. Used for serialization. + + Args: + deep: Does nothing. Here for sklearn compatibility. + + Returns: + Members that need to be set for this object. + + """ return self.param_distributions def set_params(self, **params): + """Set the params for this object. Used for serialization. + + Also used to init this object when automatic tuning is not used. + + Args: + **params: Members to set from get_params. + + Returns: + self. + + """ self.param_distributions = params["param_distributions"] self._param_set = True + return self def __call__(self): + """Overridden for MutableMapping. + + Returns: + self.param_distributions + + """ return self.param_distributions def __iter__(self): + """Iterate over self.param_distributions. + + Returns: + iter(self.param_distributions) + + """ return iter(self.param_distributions) def __getitem__(self, item): + """Return value at index item from internal list of params. + + Args: + item: index in list. + + Returns: + item at index from self.param_distributions. + + """ return self.param_distributions[item] def __setitem__(self, key, value): + """Set value at index key from internal list of params. + + Args: + key: index + value: value + + """ self.param_distributions[key] = value def __len__(self): + """Length of self.param_distributions list. + + Returns: + len(self.param_distributions) + + """ return len(self.param_distributions) - def __delitem__(self, key): # overriding abstract method but should not - # be called - raise NotImplementedError("") + def __delitem__(self, key): # overriding abstract method, not to be used. + """Not implemented, only overrode because it is an abstract method. + + Args: + key: not used. + + Raises: + NotImplementedError: If called + + """ + raise NotImplementedError( + "Abstract method not implemented. Should " "not be called.fl" + ) diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index d918d33..761367d 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -10,9 +10,19 @@ class HyperOptRandomSampler(object): + """Sampler that is an iterable over param distribution.""" + def __init__( self, param_distributions, n_iter, random_state=None, max_tries=100 ): + """Constructor. + + Args: + param_distributions: Parameter distribution as nested list-dict. + n_iter: length of returned iterator. + random_state: random state. + max_tries: max attempts to try to get a new unique value. + """ param_distributions = _replace_list( None, param_distributions.param_distributions, hp.choice ) @@ -22,6 +32,17 @@ def __init__( self.max_tries = max_tries def __iter__(self): + """Search parameter distribution for unique states. + + As each state is defined using hp.choice, we don't explicitly know + each of the unique states that our estimator can be set to. We + sample the distribution of states up until max_tries times to get + these unique states and return an iterable of them. + + Returns: + iterable of unique states. + + """ # check if all distributions are given as lists # in this case we want to sample without replacement rng = check_random_state(self.random_state) @@ -39,11 +60,18 @@ def __iter__(self): return iter(prev_samples) def __len__(self): - """Number of points that will be sampled.""" + """Get number of sampled points for optimization. + + Returns: + Number of unique states to be returned. + + """ return self.n_iter class RandomSearchCV(BaseSearchCV): + """Optimize Foreshadow.pipeline and/or its sub-objects.""" + def __init__( self, estimator, @@ -81,7 +109,12 @@ def __init__( ) def _get_param_iterator(self): - """Return ParameterSampler instance for the given distributions""" + """Return ParameterSampler instance for the given distributions. + + Returns: + iterable of unique states defined by HyperOptRandomSampler. + + """ out = HyperOptRandomSampler( self.param_distributions, self.n_iter, diff --git a/foreshadow/optimizers/tuner.py b/foreshadow/optimizers/tuner.py index 9145c3b..75b98a6 100644 --- a/foreshadow/optimizers/tuner.py +++ b/foreshadow/optimizers/tuner.py @@ -3,16 +3,10 @@ import importlib import inspect -import hyperopt.pyll.stochastic as stoch -import six from hyperopt import hp from sklearn.exceptions import NotFittedError -from sklearn.model_selection._search import BaseSearchCV -from sklearn.utils import check_random_state from sklearn.utils.validation import check_is_fitted -import foreshadow as fs -import foreshadow.serializers as ser from foreshadow.base import BaseEstimator, TransformerMixin @@ -41,10 +35,10 @@ date: -p1 -p2 - - + + Convention: - Column name is last. If a . is present, then applied across all + Column name is last. If a . is present, then applied across all columns. Things that may be swapped: @@ -92,6 +86,16 @@ def _replace_list(key, obj, replace_with=hp.choice): def get(optimizer, **optimizer_kwargs): + """Get optimizer from foreshadow.optimizers package. + + Args: + optimizer: optimizer name or class + **optimizer_kwargs: kwargs used in instantiation. + + Returns: + Corresponding instantiated optimizer using kwargs. + + """ if isinstance(optimizer, str): mod = importlib.import_module("foreshadow.optimizers") return getattr(mod, optimizer)(**optimizer_kwargs) @@ -135,11 +139,32 @@ def _reset(self): pass def fit(self, X, y, **fit_params): + """Optimize self.pipeline using self.optimizer. + + Args: + X: input points + y: input labels + **fit_params: params to optimizer fit method. + + Returns: + self + + """ self._reset() self.optimizer.fit(X, y, **fit_params) self.best_pipeline = self.optimizer.best_estimator_ self.best_params = self.optimizer.best_params_ + return self def transform(self, pipeline): + """Transform pipeline using best_pipeline. + + Args: + pipeline: input pipeline + + Returns: + best_pipeline. + + """ check_is_fitted(self, "best_pipeline") return self.best_pipeline diff --git a/foreshadow/preparer.py b/foreshadow/preparer.py index 0cab057..b9d921e 100644 --- a/foreshadow/preparer.py +++ b/foreshadow/preparer.py @@ -43,8 +43,7 @@ def _none_to_dict(name, val, column_sharer=None): class DataPreparer(Pipeline, PipelineSerializerMixin): - """Predefined pipeline for the foreshadow workflow. This Pipeline has 5 - steps: + """Predefined pipeline for foreshadow workflow. This Pipeline has 5 steps. 1. Cleaning 2. Intent selection (data type, one of Categorical, Numerical, and Text) @@ -119,6 +118,3 @@ def _get_params(self, attr, deep=True): out.update({"steps": steps}) # manually # adding steps to the get_params() return out - - def set_params(self, **kwargs): - return super().set_params(**kwargs) diff --git a/foreshadow/serializers.py b/foreshadow/serializers.py index b440d45..b5ff6b7 100644 --- a/foreshadow/serializers.py +++ b/foreshadow/serializers.py @@ -448,12 +448,43 @@ def dict_serialize(self, deep=False): class ParamSpecSerializerMixin(ConcreteSerializerMixin): + """Custom serialization for ParamSpec object.""" + def serialize(self, **kwargs): + """Serialize data as specified. + + If you would like to save the transformer parameters without saving + its state in a human readable form, use `dict`. If you would like to + save the transformer with its internal state use `inline` to + save it in its hex form in the json. If you would like a more space + efficient form save use `disk` to save it a cache directory in + the root (~/.foreshadow/cache) that must be manually cleaned. Lastly, + if the transformer being serialized is custom, then the class itself + will be cached in pickle form and placed in the `pickle_class` + attribute. + + Args: + **kwargs: The keyword arguments to pass to the serialization method + + Returns: + str: The appropriate string representation of the serialization. + + """ full_ser = super().serialize(**kwargs) return full_ser @classmethod def deserialize(cls, data): + """Specify the method routing for a transformer deserialization. + + Args: + data (dict): The counterpart to serialize that has all the required + args to build a transformer. + + Returns: + object: The deserialized transformer + + """ return super().deserialize(data) diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 6a64fc4..5d85a96 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -1,8 +1,6 @@ """General base classes used across Foreshadow.""" from collections import MutableMapping, defaultdict, namedtuple -from sklearn.utils.validation import check_is_fitted - from foreshadow.base import BaseEstimator, TransformerMixin from foreshadow.concrete.internals.notransform import NoTransform from foreshadow.logging import logging @@ -517,6 +515,9 @@ def transform(self, X, *args, **kwargs): Returns: result from .transform() + Raises: + ValueError: if not fitted. + """ if getattr(self, "_parallel_process", None) is None: raise ValueError("not fitted.") diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py index 25f3dfc..04e8307 100644 --- a/foreshadow/tests/test_foreshadow.py +++ b/foreshadow/tests/test_foreshadow.py @@ -47,7 +47,7 @@ def test_foreshadow_X_preparer_error(): with pytest.raises(ValueError) as e: _ = Foreshadow(X_preparer=preprocessor) - assert str(e.value) == "Invalid value passed as X_preparer" + assert str(e.value) == "Invalid value: 'Invalid' passed as X_preparer" def test_foreshadow_y_preparer_false(): @@ -116,7 +116,7 @@ def test_foreshadow_optimizer_error_invalid(): with pytest.raises(ValueError) as e: _ = Foreshadow(optimizer=optimizer) - assert str(e.value) == "Invalid value passed as optimizer" + assert str(e.value) == "Invalid optimizer: 'Invalid' passed." def test_foreshadow_optimizer_error_wrongclass(): @@ -126,7 +126,10 @@ def test_foreshadow_optimizer_error_wrongclass(): with pytest.raises(ValueError) as e: _ = Foreshadow(optimizer=optimizer) - assert str(e.value) == "Invalid value passed as optimizer" + assert ( + str(e.value) == "Invalid optimizer: '' passed." + ) def test_foreshadow_warns_on_set_estimator_optimizer(): diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py index c187d34..6fe6086 100644 --- a/foreshadow/tests/test_optimizers/test_random_search.py +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -7,17 +7,18 @@ def simple_distribution(): """Simple parameter distribution for testing.""" from foreshadow.optimizers import ParamSpec + ps = ParamSpec() dist = [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": [False, True], - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": [(0, 1), (0, 0.5)] - }, - ] + { + "s__transformer": "StandardScaler", + "s__transformer__with_mean": [False, True], + }, + { + "s__transformer": "MinMaxScaler", + "s__transformer__feature_range": [(0, 1), (0, 0.5)], + }, + ] ps.set_params(**{"param_distributions": dist}) yield ps @@ -27,9 +28,10 @@ def iris_data(): """Iris dataset.""" import sklearn.datasets as dt import pandas as pd + data = dt.load_iris() X_data = pd.DataFrame(data.data, columns=data.feature_names).iloc[:, 0] - y_data = pd.DataFrame(data.target, columns=["target"])['target'] + y_data = pd.DataFrame(data.target, columns=["target"])["target"] return X_data, y_data @@ -65,18 +67,18 @@ def test_random_search_simple(estimator, simple_distribution, iris_data): """ from foreshadow.optimizers import RandomSearchCV + estimator, counter = estimator dist = simple_distribution keys = {key: None for d in dist.param_distributions for key in d} estimator.keys = lambda x: keys estimator = estimator() X, y = iris_data - rs = RandomSearchCV(estimator=estimator, - param_distributions=dist) + rs = RandomSearchCV(estimator=estimator, param_distributions=dist) rs.fit(X, y) unique_samples = set() for sample in counter: - v = '' + v = "" for val in sample.values(): v += str(val) unique_samples.add(v) @@ -94,14 +96,15 @@ def test_random_param_list_simple(simple_distribution): """ from foreshadow.optimizers.random_search import HyperOptRandomSampler + dist = simple_distribution - Sampler = HyperOptRandomSampler(dist, 10) + Sampler = HyperOptRandomSampler(dist, 10, max_tries=100000) samples = [] for sample in Sampler: samples.append(sample) unique_samples = set() for sample in samples: - v = '' + v = "" for val in sample.values(): v += str(val) unique_samples.add(v) diff --git a/foreshadow/tests/test_optimizers/test_tuner.py b/foreshadow/tests/test_optimizers/test_tuner.py index 1c7b879..ff86c07 100644 --- a/foreshadow/tests/test_optimizers/test_tuner.py +++ b/foreshadow/tests/test_optimizers/test_tuner.py @@ -1,3 +1 @@ """Test tuner.py""" - -import pytest diff --git a/pyproject.toml b/pyproject.toml index e26b01f..e0718eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,7 @@ exclude = ''' [tool.isort] known_first_party = 'foreshadow' -known_third_party = ["category_encoders", "jsonpickle", "marshmallow", "numpy", "pandas", "patchy", "pytest", "scipy", "six", "sklearn", "tpot", "yaml"] +known_third_party = ["category_encoders", "hyperopt", "jsonpickle", "marshmallow", "numpy", "pandas", "patchy", "pytest", "scipy", "six", "sklearn", "tpot", "yaml"] multi_line_output = 3 lines_after_imports = 2 force_grid_wrap = 0 From edac4872ce004c06a4bc135c2a078a9a0a9cb710 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 14:20:18 -0400 Subject: [PATCH 30/37] Adding non_unique flag to HyperOptSampler --- foreshadow/optimizers/random_search.py | 12 ++++-- foreshadow/steps/preparerstep.py | 37 +------------------ .../test_optimizers/test_random_search.py | 18 ++++++++- 3 files changed, 27 insertions(+), 40 deletions(-) diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index 761367d..c4f00bc 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -21,7 +21,8 @@ def __init__( param_distributions: Parameter distribution as nested list-dict. n_iter: length of returned iterator. random_state: random state. - max_tries: max attempts to try to get a new unique value. + max_tries: max attempts to try to get a new unique value. If + None, will not attempt to get unique values. """ param_distributions = _replace_list( None, param_distributions.param_distributions, hp.choice @@ -37,7 +38,9 @@ def __iter__(self): As each state is defined using hp.choice, we don't explicitly know each of the unique states that our estimator can be set to. We sample the distribution of states up until max_tries times to get - these unique states and return an iterable of them. + these unique states and return an iterable of them. if max_tries is + None (set in constructor), then we sample the search space and add each + sampled value. Returns: iterable of unique states. @@ -47,12 +50,13 @@ def __iter__(self): # in this case we want to sample without replacement rng = check_random_state(self.random_state) prev_samples = [] + max_tries = self.max_tries if self.max_tries is not None else 1 for _ in six.moves.range(self.n_iter): # import pdb; pdb.set_trace() sample = stoch.sample(self.param_distributions, rng=rng) n_tries = 0 - while sample not in prev_samples and n_tries < self.max_tries: - if sample not in prev_samples: + while sample not in prev_samples and n_tries < max_tries: + if sample not in prev_samples or self.max_tries is None: prev_samples.append(sample) break sample = stoch.sample(self.param_distributions, rng=rng) diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 5d85a96..837f68e 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -546,7 +546,8 @@ def _get_param_names(cls): This method is implemented as a convenience for any child. It will automatically climb the MRO for a child until it reaches this class - (the last parent who's __init__ params we care about). + (the last parent who's __init__ params we care about). Also adds + _parallel_process to the sklearn get_params API. Returns: params for all parents up to and including PreparerStep. @@ -560,37 +561,3 @@ def _get_param_names(cls): if "_parallel_process" not in params: params += ["_parallel_process"] return params - - def get_params(self, deep=True): - """See super. - - Overridden to add this parent classes' params to children and to - include _parallel_process. _get_param_names holds the logic for - getting all parent params. - - Args: - deep: See super. - - Returns: - See super. - - """ - params = super().get_params(deep=deep) - # params.update( - # {"_parallel_process": getattr(self, "_parallel_process", None)} - # ) - return params - - def set_params(self, **params): - """See super. - - Overridden to afld this parent classes' params to children and to - include _parallel_process. _get_param_names holds the logic for - getting all parent params. - - Args: - **params: see super. - - """ - # self._parallel_process = params.pop("_parallel_process", None) - super().set_params(**params) diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py index 6fe6086..f1cffd3 100644 --- a/foreshadow/tests/test_optimizers/test_random_search.py +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -98,7 +98,7 @@ def test_random_param_list_simple(simple_distribution): from foreshadow.optimizers.random_search import HyperOptRandomSampler dist = simple_distribution - Sampler = HyperOptRandomSampler(dist, 10, max_tries=100000) + Sampler = HyperOptRandomSampler(dist, 10, max_tries=1000000) samples = [] for sample in Sampler: samples.append(sample) @@ -109,3 +109,19 @@ def test_random_param_list_simple(simple_distribution): v += str(val) unique_samples.add(v) assert len(unique_samples) == 4 # 4 unique samples. + + +def test_random_param_list_simple_non_unique(simple_distribution): + """Test that sampler properly gives non unique iterations. + + Args: + simple_distribution: fixture parameter distribution. + + Returns: + + """ + from foreshadow.optimizers.random_search import HyperOptRandomSampler + + dist = simple_distribution + Sampler = HyperOptRandomSampler(dist, 10, max_tries=None) + assert len(Sampler) == 10 # 10 non unique samples. From 470daa19581bf306fff5ce377cc7dea2ad7a8031 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 16:55:02 -0400 Subject: [PATCH 31/37] Cleaning up implementation --- foreshadow/foreshadow.py | 29 +---------- foreshadow/optimizers/__init__.py | 36 +------------ foreshadow/optimizers/param_distribution.py | 52 +++++++++++++++---- foreshadow/optimizers/random_search.py | 10 ++-- .../test_optimizers/test_random_search.py | 3 +- 5 files changed, 51 insertions(+), 79 deletions(-) diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 7ad863c..187a7b5 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -10,7 +10,7 @@ from foreshadow.columnsharer import ColumnSharer from foreshadow.estimators.auto import AutoEstimator from foreshadow.estimators.meta import MetaEstimator -from foreshadow.optimizers import ParamSpec, Tuner, test_params +from foreshadow.optimizers import ParamSpec, Tuner from foreshadow.pipeline import SerializablePipeline from foreshadow.preparer import DataPreparer from foreshadow.utils import check_df @@ -219,20 +219,7 @@ def fit(self, data_df, y_df): if self.optimizer is not None: self.pipeline.fit(X_df, y_df) - # self.pipeline.predict(X_df) - # print(self.pipeline.get_params(deep=True)) - # for key in self.pipeline.get_params().keys(): - # if key.find('feature_preprocessor') != -1: - # print(key) - # print(test_params[0]) - # print([x in self.pipeline.get_params().keys() for x in - # test_params[0].keys()]) - # self.pipeline.set_params(**test_params[0]) - # self.pipeline.fit(X_df, y_df) - # Calculate parameter search space - # param_ranges = param_mapping(deepcopy(self.pipeline), X_df, y_df) - params = ParamSpec() - params.set_params(param_distributions=test_params) + params = ParamSpec(self.pipeline, X_df, y_df) self.opt_instance = self.optimizer( estimator=self.pipeline, param_distributions=params, @@ -245,19 +232,7 @@ def fit(self, data_df, y_df): ) self.tuner = Tuner(self.pipeline, params, self.opt_instance) self.tuner.fit(X_df, y_df) - import pandas as pd - - results = pd.DataFrame(self.opt_instance.cv_results_) - results = results[ - [ - c - for c in results.columns - if all(s not in c for s in ["time", "params"]) - ] - ] - print(results) self.pipeline = self.tuner.transform(self.pipeline) - print(self.pipeline) # extract trained preprocessors if self.X_preparer is not None: self.X_preparer = self.pipeline.steps[0][1] diff --git a/foreshadow/optimizers/__init__.py b/foreshadow/optimizers/__init__.py index 5d3b893..b144c3f 100644 --- a/foreshadow/optimizers/__init__.py +++ b/foreshadow/optimizers/__init__.py @@ -3,39 +3,7 @@ # from foreshadow.optimizers.param_mapping import param_mapping from foreshadow.optimizers.param_distribution import ParamSpec from foreshadow.optimizers.random_search import RandomSearchCV -from foreshadow.optimizers.tuner import Tuner -from foreshadow.utils import get_transformer +from foreshadow.optimizers.tuner import Tuner, get -test_params = [ - { - "s__transformer": "StandardScaler", - "s__transformer__with_mean": [False, True], - }, - { - "s__transformer": "MinMaxScaler", - "s__transformer__feature_range": [(0, 1), (0, 0.5)], - }, -] - -test_params = [ - { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": get_transformer( - "OneHotEncoder" - )(), - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": [ - True, - False, - ], - }, - { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": get_transformer( - "HashingEncoder" - )() - }, -] - -__all__ = ["ParamSpec", "Tuner", "RandomSearchCV", "test_params"] +__all__ = ["ParamSpec", "Tuner", "RandomSearchCV", "get"] diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 2a586d0..236f001 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -2,8 +2,9 @@ from collections import MutableMapping +import hyperopt.hp as hp import foreshadow.serializers as ser - +from .tuner import get, _replace_list """ 2. cases: @@ -100,16 +101,41 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): ) self._param_set = False self.param_distributions = [] + if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: - raise NotImplementedError("Automatic param spec not implemented") - # automatic pipelining. - # params = fs_pipeline.get_params() - # for kwarg in kwargs: - # key, delim, subkey = kwarg.partition('__') - # self.param_distribution[key] = {} - # while delim != '': - # pass - # self._param_set = True + self.param_distributions = [ + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": get( + "OneHotEncoder" + ), + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": [ + True, + False, + ], + }, + { + "X_preparer__feature_preprocessor___parallel_process__group" + ": 0__CategoricalEncoder__transformer__ohe": get( + "HashingEncoder" + ) + }, + ] + + def convert(self, key, replace_val=hp.choice): + """Convert internal self.param_distributions to valid distribution. + + Uses _replace_list to replace all lists with replace_val + + Args: + key: key to use for top level hp.choice name + replace_val: value to replace lists with. + + """ + self.param_distributions = _replace_list(key, + self.param_distributions, + replace_with=replace_val) def get_params(self, deep=True): """Get the params for this object. Used for serialization. @@ -188,6 +214,9 @@ def __len__(self): """ return len(self.param_distributions) + def __contains__(self, item): + return self.param_distributions.__contains__(item) + def __delitem__(self, key): # overriding abstract method, not to be used. """Not implemented, only overrode because it is an abstract method. @@ -201,3 +230,6 @@ def __delitem__(self, key): # overriding abstract method, not to be used. raise NotImplementedError( "Abstract method not implemented. Should " "not be called.fl" ) + + def __hash__(self): + return self.param_distributions.__hash__() diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index c4f00bc..d433125 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -6,8 +6,6 @@ from sklearn.model_selection._search import BaseSearchCV from sklearn.utils import check_random_state -from .tuner import _replace_list - class HyperOptRandomSampler(object): """Sampler that is an iterable over param distribution.""" @@ -24,9 +22,7 @@ def __init__( max_tries: max attempts to try to get a new unique value. If None, will not attempt to get unique values. """ - param_distributions = _replace_list( - None, param_distributions.param_distributions, hp.choice - ) + param_distributions.convert(None, hp.choice) self.param_distributions = param_distributions self.n_iter = n_iter self.random_state = random_state @@ -53,13 +49,13 @@ def __iter__(self): max_tries = self.max_tries if self.max_tries is not None else 1 for _ in six.moves.range(self.n_iter): # import pdb; pdb.set_trace() - sample = stoch.sample(self.param_distributions, rng=rng) + sample = stoch.sample(self.param_distributions(), rng=rng) n_tries = 0 while sample not in prev_samples and n_tries < max_tries: if sample not in prev_samples or self.max_tries is None: prev_samples.append(sample) break - sample = stoch.sample(self.param_distributions, rng=rng) + sample = stoch.sample(self.param_distributions(), rng=rng) n_tries += 1 return iter(prev_samples) diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py index f1cffd3..8f2d285 100644 --- a/foreshadow/tests/test_optimizers/test_random_search.py +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -98,7 +98,7 @@ def test_random_param_list_simple(simple_distribution): from foreshadow.optimizers.random_search import HyperOptRandomSampler dist = simple_distribution - Sampler = HyperOptRandomSampler(dist, 10, max_tries=1000000) + Sampler = HyperOptRandomSampler(dist, 10, max_tries=999999999999999999) samples = [] for sample in Sampler: samples.append(sample) @@ -108,6 +108,7 @@ def test_random_param_list_simple(simple_distribution): for val in sample.values(): v += str(val) unique_samples.add(v) + print(unique_samples) assert len(unique_samples) == 4 # 4 unique samples. From 3e788fa91fd876cf93c6a590537d9e402691fdf3 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Mon, 19 Aug 2019 16:57:54 -0400 Subject: [PATCH 32/37] flake --- foreshadow/optimizers/param_distribution.py | 49 +++++++++++++-------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index 236f001..f127885 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -3,8 +3,11 @@ from collections import MutableMapping import hyperopt.hp as hp + import foreshadow.serializers as ser -from .tuner import get, _replace_list + +from .tuner import _replace_list, get + """ 2. cases: @@ -89,7 +92,6 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): Raises: ValueError: if either all kwargs are not passed or all aren't passed. - NotImplementedError: All kwargs passed. """ if not (fs_pipeline is None) == (X_df is None) == (y_df is None): @@ -105,21 +107,17 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: self.param_distributions = [ { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": get( - "OneHotEncoder" - ), - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe__drop_invariant": [ - True, - False, - ], + "X_preparer__feature_preprocessor___" + "parallel_process__group: 0__CategoricalEncoder__" + "transformer__ohe": get("OneHotEncoder"), + "X_preparer__feature_preprocessor" + "___parallel_process__group: 0__CategoricalEncoder__" + "transformer__ohe__drop_invariant": [True, False], }, { - "X_preparer__feature_preprocessor___parallel_process__group" - ": 0__CategoricalEncoder__transformer__ohe": get( - "HashingEncoder" - ) + "X_preparer__feature_preprocessor___" + "parallel_process__group: 0__CategoricalEncoder__" + "transformer__ohe": get("HashingEncoder") }, ] @@ -133,9 +131,9 @@ def convert(self, key, replace_val=hp.choice): replace_val: value to replace lists with. """ - self.param_distributions = _replace_list(key, - self.param_distributions, - replace_with=replace_val) + self.param_distributions = _replace_list( + key, self.param_distributions, replace_with=replace_val + ) def get_params(self, deep=True): """Get the params for this object. Used for serialization. @@ -215,6 +213,15 @@ def __len__(self): return len(self.param_distributions) def __contains__(self, item): + """Get if internal param distribution contains item. + + Args: + item: item to check + + Returns: + True if it contains the item. False else. + + """ return self.param_distributions.__contains__(item) def __delitem__(self, key): # overriding abstract method, not to be used. @@ -232,4 +239,10 @@ def __delitem__(self, key): # overriding abstract method, not to be used. ) def __hash__(self): + """Return unique hash from self.param_distributions. + + Returns: + unique hash from internal param distribution + + """ return self.param_distributions.__hash__() From 55e6b22a673b7639ad37a29b0674586e5f0b8982 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Thu, 22 Aug 2019 19:24:26 -0400 Subject: [PATCH 33/37] CR --- foreshadow/concrete/internals/fancyimpute.py | 4 +- foreshadow/concrete/internals/labelencoder.py | 8 ++-- foreshadow/estimators/auto.py | 25 ----------- foreshadow/foreshadow.py | 9 +--- foreshadow/optimizers/param_distribution.py | 29 +----------- foreshadow/optimizers/random_search.py | 6 +-- foreshadow/preparer.py | 1 - foreshadow/serializers.py | 44 +------------------ foreshadow/smart/smart.py | 31 ------------- foreshadow/steps/preparerstep.py | 2 - .../test_optimizers/test_random_search.py | 12 ++--- .../test_cleaners/test_data_cleaner.py | 15 ------- foreshadow/wrapper.py | 41 ----------------- 13 files changed, 19 insertions(+), 208 deletions(-) diff --git a/foreshadow/concrete/internals/fancyimpute.py b/foreshadow/concrete/internals/fancyimpute.py index 067ae54..b9ca4d9 100644 --- a/foreshadow/concrete/internals/fancyimpute.py +++ b/foreshadow/concrete/internals/fancyimpute.py @@ -43,7 +43,7 @@ def _load_imputer(self): self.imputer = self.cls(**self.impute_kwargs) def get_params(self, deep=True): - """Get parameters for this estimator. + """Get parameters for this transformer. Args: deep (bool): If True, will return the parameters for this estimator @@ -56,7 +56,7 @@ def get_params(self, deep=True): return super().get_params(deep=deep) def set_params(self, **params): - """Set the parameters of this estimator. + """Set the parameters of this transformer. Valid parameter keys can be listed with :meth:`get_params()`. diff --git a/foreshadow/concrete/internals/labelencoder.py b/foreshadow/concrete/internals/labelencoder.py index d1af849..42dedf1 100644 --- a/foreshadow/concrete/internals/labelencoder.py +++ b/foreshadow/concrete/internals/labelencoder.py @@ -64,13 +64,13 @@ def inverse_transform(self, X): return self.encoder.inverse_transform(X) def get_params(self, deep=True): - """Get parameters for this estimator. See super. + """Get parameters for this transformer. See super. Args: deep: deep to super get_params Returns: - Params for this estimator. See super. + Params for this transformer. See super. """ params = super().get_params(deep=deep) @@ -81,10 +81,10 @@ def get_params(self, deep=True): return params def set_params(self, **params): - """Set parameters for this estimator. See super. + """Set parameters for this transformer. See super. Args: - **params: params to set on this estimator. + **params: params to set on this transformer. """ self.encoder = params.pop("encoder") diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py index c5d96a1..77287fe 100644 --- a/foreshadow/estimators/auto.py +++ b/foreshadow/estimators/auto.py @@ -315,31 +315,6 @@ def score(self, X, y, sample_weight=None): y = check_df(y) return self.estimator.score(X, y) - def get_params(self, deep=True): - """Get params for this object. See super. - - Args: - deep: True to recursively call get_params, False to not. - - Returns: - params for this object. - - """ - params = super().get_params(deep=deep) - return params - - def set_params(self, **params): - """Set params for this object. See super. - - Args: - **params: params to set. - - Returns: - See super. - - """ - return super().set_params(**params) - def determine_problem_type(y): """Determine modeling problem type. diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py index 187a7b5..0cad70f 100644 --- a/foreshadow/foreshadow.py +++ b/foreshadow/foreshadow.py @@ -4,7 +4,6 @@ import warnings from sklearn.model_selection._search import BaseSearchCV -from sklearn.utils.validation import check_is_fitted from foreshadow.base import BaseEstimator from foreshadow.columnsharer import ColumnSharer @@ -58,7 +57,6 @@ def __init__( self.data_columns = None if isinstance(self.estimator, AutoEstimator) and optimizer is not None: - # TODO implement V2 architecture here. warnings.warn( "An automatic estimator cannot be used with an optimizer." " Proceeding without use of optimizer" @@ -176,14 +174,11 @@ def optimizer(self, o): raise ValueError("Invalid optimizer: '{}' passed.".format(o)) def _reset(self): - try: - check_is_fitted(self, "pipeline") + if hasattr(self, "pipeline"): del self.pipeline - check_is_fitted(self, "tuner") + if hasattr(self, "tuner"): del self.tuner del self.opt_instance - except AttributeError: - pass def fit(self, data_df, y_df): """Fit the Foreshadow instance using the provided input data. diff --git a/foreshadow/optimizers/param_distribution.py b/foreshadow/optimizers/param_distribution.py index f127885..2c927b8 100644 --- a/foreshadow/optimizers/param_distribution.py +++ b/foreshadow/optimizers/param_distribution.py @@ -9,34 +9,6 @@ from .tuner import _replace_list, get -""" -2. cases: - -1. Apply override to initial columns - -In this case, we simply need to override the get_mapping result. This is -hard to do because it is computed at .fit() time, not __init__ time. We need to -compute it at .fit() time because we need access to the dataset. Instead, -we will pass overrides to the __init__ and handle the errors if users choose -wrong columns. - - -2. apply override to a dynamically created transformer - -In this case, the output from a previous step in the PreparerStep's pipeline -created new columns. Thesee will not be available at get_mapping() time. If -we pass in these columns to ParallelProcessor, it will try to slice then out -which will break. We do however know the initial column and, knowing -DynamicPipeline's naming scheme, the new column's name. We can enable an -override on a per column level by passing in the eventual columns to be -overridden to that group process. - - - -ParamSpec -""" - - class ParamSpec(MutableMapping, ser.ConcreteSerializerMixin): """Holds the specification of the parameter search space. @@ -104,6 +76,7 @@ def __init__(self, fs_pipeline=None, X_df=None, y_df=None): self._param_set = False self.param_distributions = [] + # automatic pipeline determination. if not (fs_pipeline is None) and (X_df is None) and (y_df) is None: self.param_distributions = [ { diff --git a/foreshadow/optimizers/random_search.py b/foreshadow/optimizers/random_search.py index d433125..2d7901d 100644 --- a/foreshadow/optimizers/random_search.py +++ b/foreshadow/optimizers/random_search.py @@ -1,7 +1,6 @@ """Random optimization of params.""" import hyperopt.pyll.stochastic as stoch -import six from hyperopt import hp from sklearn.model_selection._search import BaseSearchCV from sklearn.utils import check_random_state @@ -47,11 +46,10 @@ def __iter__(self): rng = check_random_state(self.random_state) prev_samples = [] max_tries = self.max_tries if self.max_tries is not None else 1 - for _ in six.moves.range(self.n_iter): - # import pdb; pdb.set_trace() + for _ in range(self.n_iter): sample = stoch.sample(self.param_distributions(), rng=rng) n_tries = 0 - while sample not in prev_samples and n_tries < max_tries: + while sample not in prev_samples or n_tries < max_tries: if sample not in prev_samples or self.max_tries is None: prev_samples.append(sample) break diff --git a/foreshadow/preparer.py b/foreshadow/preparer.py index b9d921e..13d4ab8 100644 --- a/foreshadow/preparer.py +++ b/foreshadow/preparer.py @@ -71,7 +71,6 @@ def __init__( engineerer_kwargs=None, preprocessor_kwargs=None, reducer_kwargs=None, - modeler_kwargs=None, y_var=None, **kwargs, ): diff --git a/foreshadow/serializers.py b/foreshadow/serializers.py index b5ff6b7..27f59a0 100644 --- a/foreshadow/serializers.py +++ b/foreshadow/serializers.py @@ -269,8 +269,7 @@ def dict_serialize(self, deep=True): """ return _make_serializable( - self.get_params(deep), - # serialize_args=self.serialize_params + self.get_params(deep), serialize_args=self.serialize_params ) @classmethod @@ -447,47 +446,6 @@ def dict_serialize(self, deep=False): return super().dict_serialize(deep=deep) -class ParamSpecSerializerMixin(ConcreteSerializerMixin): - """Custom serialization for ParamSpec object.""" - - def serialize(self, **kwargs): - """Serialize data as specified. - - If you would like to save the transformer parameters without saving - its state in a human readable form, use `dict`. If you would like to - save the transformer with its internal state use `inline` to - save it in its hex form in the json. If you would like a more space - efficient form save use `disk` to save it a cache directory in - the root (~/.foreshadow/cache) that must be manually cleaned. Lastly, - if the transformer being serialized is custom, then the class itself - will be cached in pickle form and placed in the `pickle_class` - attribute. - - Args: - **kwargs: The keyword arguments to pass to the serialization method - - Returns: - str: The appropriate string representation of the serialization. - - """ - full_ser = super().serialize(**kwargs) - return full_ser - - @classmethod - def deserialize(cls, data): - """Specify the method routing for a transformer deserialization. - - Args: - data (dict): The counterpart to serialize that has all the required - args to build a transformer. - - Returns: - object: The deserialized transformer - - """ - return super().deserialize(data) - - def deserialize(data): """Allow the deserialization of any transformer. diff --git a/foreshadow/smart/smart.py b/foreshadow/smart/smart.py index e67da4c..a268f03 100644 --- a/foreshadow/smart/smart.py +++ b/foreshadow/smart/smart.py @@ -127,37 +127,6 @@ def unset_resolve(self): self.should_resolve = False self.force_reresolve = False - def get_params(self, deep=True): - """Get parameters for this estimator. - - Note: self.name and self.keep_columns are provided by the wrapping - method - - Args: - deep (bool): If True, will return the parameters for this estimator - and contained sub-objects that are estimators. - - Returns: - Parameter names mapped to their values. - - """ - params = super().get_params(deep=deep) - return params - - def set_params(self, **params): - """Set the parameters of this estimator. - - Valid parameter keys can be listed with :meth:`get_params()`. - - Args: - **params (dict): any valid parameter of this estimator - - Returns: - see super. - - """ - return super().set_params(**params) - @abstractmethod def pick_transformer(self, X, y=None, **fit_params): """Pick the correct transformer object for implementations. diff --git a/foreshadow/steps/preparerstep.py b/foreshadow/steps/preparerstep.py index 837f68e..7b55a1f 100644 --- a/foreshadow/steps/preparerstep.py +++ b/foreshadow/steps/preparerstep.py @@ -454,8 +454,6 @@ def fit(self, X, *args, **kwargs): transformed data handled by Pipeline._fit """ - # TODO make fit remove a step if nothing is done, rather than a - # NoTransform Transformer. self.fit_transform(X, *args, **kwargs) return self diff --git a/foreshadow/tests/test_optimizers/test_random_search.py b/foreshadow/tests/test_optimizers/test_random_search.py index 8f2d285..f6ba22b 100644 --- a/foreshadow/tests/test_optimizers/test_random_search.py +++ b/foreshadow/tests/test_optimizers/test_random_search.py @@ -36,7 +36,7 @@ def iris_data(): @pytest.fixture() -def estimator(mocker): +def estimator_counter(mocker): """Mocked estimator. .keys method must be set to return all possible keys from the parameter distribution.""" counter = [] @@ -57,18 +57,20 @@ def get_params(self, deep=True): return Estimator, counter -def test_random_search_simple(estimator, simple_distribution, iris_data): +def test_random_search_simple( + estimator_counter, simple_distribution, iris_data +): """Test that random search finds all different parameter specifications. Args: - estimator: fixture estimator + estimator_counter: fixture estimator_counter simple_distribution: fixture distribution to parameter optimize on. iris_data: fixture dataset to use. """ from foreshadow.optimizers import RandomSearchCV - estimator, counter = estimator + estimator, counter = estimator_counter dist = simple_distribution keys = {key: None for d in dist.param_distributions for key in d} estimator.keys = lambda x: keys @@ -98,7 +100,7 @@ def test_random_param_list_simple(simple_distribution): from foreshadow.optimizers.random_search import HyperOptRandomSampler dist = simple_distribution - Sampler = HyperOptRandomSampler(dist, 10, max_tries=999999999999999999) + Sampler = HyperOptRandomSampler(dist, 10, max_tries=100) samples = [] for sample in Sampler: samples.append(sample) diff --git a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py index 3206c12..3046e0e 100644 --- a/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py +++ b/foreshadow/tests/test_transformers/test_concrete/test_cleaners/test_data_cleaner.py @@ -166,19 +166,4 @@ def test_numerical_input_fittransform(): assert np.array_equal(transformed_data, data) -# def test_get_params(): -# import pandas as pd -# from foreshadow.preparer import CleanerMapper -# from foreshadow.columnsharer import ColumnSharer -# -# columns = ["financials"] -# data = pd.DataFrame({"financials": ["", "", "", ""]}, columns=columns) -# cs = ColumnSharer() -# dc = CleanerMapper(column_sharer=cs) -# dc.fit(data) -# from pprint import pprint -# print(pprint(dc.get_params())) -# print(dc._parallel_process) - - # TODO test graph, could be implemented very wrong. diff --git a/foreshadow/wrapper.py b/foreshadow/wrapper.py index b7b5893..bf032ea 100644 --- a/foreshadow/wrapper.py +++ b/foreshadow/wrapper.py @@ -109,47 +109,6 @@ def __init__(self, *args, name=None, keep_columns=False, **kwargs): self.is_wrapped = True - def get_params(self, deep=True): - """Override standard get_params to handle nonstandard init. - - BaseEstimator for sklearn gets and sets parameters based on the - init statement for that class. Since this class is used to wrap - a parent transformer (by OOP), we use the parent's init - statement and then this DFTransformer's additional arguments. - We must override _get_param_names so that this method captures - the parent's __init__. - - Args: - deep (bool): If True, will return the parameters for this - estimator and contained sub-objects that are estimators. - - Returns: - Parameter names mapped to their values for parent + - DFTransformer wrapper. - - """ - params = super().get_params(deep=deep) - return params - - def set_params(self, **params): - """Override standard set_params to handle nonstandard init. - - BaseEstimator for sklearn gets and sets parameters based on the - init statement for that class. Since this class is used to wrap - a parent transformer (by OOP), we use the parent's init - statement and then this DFTransformer's additional arguments. - We must override _get_param_names so that this method captures - the parent's __init__. - - Args: - **params: params to init. - - Returns: - See super. - - """ - return super().set_params(**params) - def fit(self, X, *args, **kwargs): """Fit the estimator or transformer, pandas enabled. From 3cd165fbc7c26e719cb124d6d51c260658b77e2c Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Fri, 23 Aug 2019 11:35:18 -0400 Subject: [PATCH 34/37] isort. --- foreshadow/parallelprocessor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/foreshadow/parallelprocessor.py b/foreshadow/parallelprocessor.py index cf9833f..1cc5382 100644 --- a/foreshadow/parallelprocessor.py +++ b/foreshadow/parallelprocessor.py @@ -9,9 +9,10 @@ _transform_one, ) -from .serializers import ConcreteSerializerMixin from foreshadow.base import BaseEstimator +from .serializers import ConcreteSerializerMixin + class ParallelProcessor(FeatureUnion, ConcreteSerializerMixin): """Class to support parallel operation on dataframes. From f866ce890f8175d54004f1ed89b038c4ce3b07bb Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Fri, 23 Aug 2019 14:45:07 -0400 Subject: [PATCH 35/37] updating lock --- poetry.lock | 89 +++++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/poetry.lock b/poetry.lock index a06eb87..3d29e0b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -65,7 +65,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "1.3.0" [[package]] -category = "dev" +category = "main" description = "Classes Without Boilerplate" name = "attrs" optional = false @@ -180,7 +180,7 @@ description = "Code coverage measurement for Python" name = "coverage" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <4" -version = "4.5.3" +version = "4.5.4" [[package]] category = "main" @@ -188,7 +188,7 @@ description = "A domain-specific language for modeling convex optimization probl name = "cvxpy" optional = false python-versions = "*" -version = "1.0.24" +version = "1.0.25" [package.dependencies] ecos = ">=2" @@ -205,7 +205,7 @@ description = "A utility for ensuring Google-style docstringsstay up to date wit name = "darglint" optional = false python-versions = ">=3.5" -version = "0.5.7" +version = "0.5.8" [[package]] category = "main" @@ -309,7 +309,7 @@ description = "Extension for flake8 which uses pydocstyle to check docstrings" name = "flake8-docstrings" optional = false python-versions = "*" -version = "1.3.0" +version = "1.3.1" [package.dependencies] flake8 = "*" @@ -357,7 +357,7 @@ description = "HTTP/2-based RPC framework" name = "grpcio" optional = false python-versions = "*" -version = "1.22.0" +version = "1.23.0" [package.dependencies] six = ">=1.5.2" @@ -380,7 +380,7 @@ description = "File identification library for Python" name = "identify" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "1.4.5" +version = "1.4.6" [[package]] category = "main" @@ -404,7 +404,7 @@ description = "Read metadata from Python packages" name = "importlib-metadata" optional = false python-versions = ">=2.7,!=3.0,!=3.1,!=3.2,!=3.3" -version = "0.18" +version = "0.19" [package.dependencies] zipp = ">=0.5" @@ -470,12 +470,12 @@ description = "Deep Learning for humans" name = "keras" optional = false python-versions = "*" -version = "2.2.4" +version = "2.2.5" [package.dependencies] h5py = "*" -keras-applications = ">=1.0.6" -keras-preprocessing = ">=1.0.5" +keras-applications = ">=1.0.8" +keras-preprocessing = ">=1.1.0" numpy = ">=1.9.1" pyyaml = "*" scipy = ">=0.14" @@ -542,7 +542,7 @@ description = "A lightweight library for converting complex datatypes to and fro name = "marshmallow" optional = false python-versions = "*" -version = "2.19.5" +version = "2.20.2" [[package]] category = "dev" @@ -607,8 +607,8 @@ category = "main" description = "NumPy is the fundamental package for array computing with Python." name = "numpy" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" -version = "1.16.4" +python-versions = ">=3.5" +version = "1.17.0" [[package]] category = "main" @@ -629,9 +629,10 @@ description = "Core utilities for Python packages" name = "packaging" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "19.0" +version = "19.1" [package.dependencies] +attrs = "*" pyparsing = ">=2.0.2" six = "*" @@ -688,7 +689,7 @@ description = "A framework for managing and maintaining multi-language pre-commi name = "pre-commit" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "1.17.0" +version = "1.18.2" [package.dependencies] "aspy.yaml" = "*" @@ -711,7 +712,7 @@ description = "Protocol Buffers" name = "protobuf" optional = false python-versions = "*" -version = "3.9.0" +version = "3.9.1" [package.dependencies] setuptools = "*" @@ -767,7 +768,7 @@ description = "Python parsing module" name = "pyparsing" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -version = "2.4.1.1" +version = "2.4.2" [[package]] category = "dev" @@ -827,7 +828,7 @@ description = "World timezone definitions, modern and historical" name = "pytz" optional = false python-versions = "*" -version = "2019.1" +version = "2019.2" [[package]] category = "main" @@ -835,7 +836,7 @@ description = "YAML parser and emitter for Python" name = "pyyaml" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "5.1.1" +version = "5.1.2" [[package]] category = "main" @@ -865,7 +866,7 @@ description = "SciPy: Scientific Library for Python" name = "scipy" optional = false python-versions = ">=3.5" -version = "1.3.0" +version = "1.3.1" [package.dependencies] numpy = ">=1.13.3" @@ -1115,7 +1116,7 @@ description = "Fast, Extensible Progress Meter" name = "tqdm" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*" -version = "4.32.2" +version = "4.34.0" [[package]] category = "main" @@ -1142,7 +1143,7 @@ description = "Virtual Python Environment builder" name = "virtualenv" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "16.7.1" +version = "16.7.4" [[package]] category = "main" @@ -1158,7 +1159,7 @@ description = "A built-package format for Python." name = "wheel" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "0.33.4" +version = "0.33.6" [[package]] category = "main" @@ -1213,9 +1214,9 @@ cfgv = ["edb387943b665bf9c434f717bf630fa78aecd53d5900d2e05da6ad6048553144", "fbd chardet = ["84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", "fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"] click = ["2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", "5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"] colorama = ["05eed71e2e327246ad6b38c540c4a3117230b19679b875190486ddd2d721422d", "f8ac84de7840f5b9c4e3347b3c1eaa50f7e49c2b07596221daec5edaabbd7c48"] -coverage = ["0c5fe441b9cfdab64719f24e9684502a59432df7570521563d7b1aff27ac755f", "2b412abc4c7d6e019ce7c27cbc229783035eef6d5401695dccba80f481be4eb3", "3684fabf6b87a369017756b551cef29e505cb155ddb892a7a29277b978da88b9", "39e088da9b284f1bd17c750ac672103779f7954ce6125fd4382134ac8d152d74", "3c205bc11cc4fcc57b761c2da73b9b72a59f8d5ca89979afb0c1c6f9e53c7390", "42692db854d13c6c5e9541b6ffe0fe921fe16c9c446358d642ccae1462582d3b", "465ce53a8c0f3a7950dfb836438442f833cf6663d407f37d8c52fe7b6e56d7e8", "48020e343fc40f72a442c8a1334284620f81295256a6b6ca6d8aa1350c763bbe", "4ec30ade438d1711562f3786bea33a9da6107414aed60a5daa974d50a8c2c351", "5296fc86ab612ec12394565c500b412a43b328b3907c0d14358950d06fd83baf", "5f61bed2f7d9b6a9ab935150a6b23d7f84b8055524e7be7715b6513f3328138e", "6899797ac384b239ce1926f3cb86ffc19996f6fa3a1efbb23cb49e0c12d8c18c", "68a43a9f9f83693ce0414d17e019daee7ab3f7113a70c79a3dd4c2f704e4d741", "6b8033d47fe22506856fe450470ccb1d8ba1ffb8463494a15cfc96392a288c09", "7ad7536066b28863e5835e8cfeaa794b7fe352d99a8cded9f43d1161be8e9fbd", "7bacb89ccf4bedb30b277e96e4cc68cd1369ca6841bde7b005191b54d3dd1034", "839dc7c36501254e14331bcb98b27002aa415e4af7ea039d9009409b9d2d5420", "8e679d1bde5e2de4a909efb071f14b472a678b788904440779d2c449c0355b27", "8f9a95b66969cdea53ec992ecea5406c5bd99c9221f539bca1e8406b200ae98c", "932c03d2d565f75961ba1d3cec41ddde00e162c5b46d03f7423edcb807734eab", "93f965415cc51604f571e491f280cff0f5be35895b4eb5e55b47ae90c02a497b", "988529edadc49039d205e0aa6ce049c5ccda4acb2d6c3c5c550c17e8c02c05ba", "998d7e73548fe395eeb294495a04d38942edb66d1fa61eb70418871bc621227e", "9de60893fb447d1e797f6bf08fdf0dbcda0c1e34c1b06c92bd3a363c0ea8c609", "9e80d45d0c7fcee54e22771db7f1b0b126fb4a6c0a2e5afa72f66827207ff2f2", "a545a3dfe5082dc8e8c3eb7f8a2cf4f2870902ff1860bd99b6198cfd1f9d1f49", "a5d8f29e5ec661143621a8f4de51adfb300d7a476224156a39a392254f70687b", "a9abc8c480e103dc05d9b332c6cc9fb1586330356fc14f1aa9c0ca5745097d19", "aca06bfba4759bbdb09bf52ebb15ae20268ee1f6747417837926fae990ebc41d", "bb23b7a6fd666e551a3094ab896a57809e010059540ad20acbeec03a154224ce", "bfd1d0ae7e292105f29d7deaa9d8f2916ed8553ab9d5f39ec65bcf5deadff3f9", "c22ab9f96cbaff05c6a84e20ec856383d27eae09e511d3e6ac4479489195861d", "c62ca0a38958f541a73cf86acdab020c2091631c137bd359c4f5bddde7b75fd4", "c709d8bda72cf4cd348ccec2a4881f2c5848fd72903c185f363d361b2737f773", "c968a6aa7e0b56ecbd28531ddf439c2ec103610d3e2bf3b75b813304f8cb7723", "ca58eba39c68010d7e87a823f22a081b5290e3e3c64714aac3c91481d8b34d22", "df785d8cb80539d0b55fd47183264b7002077859028dfe3070cf6359bf8b2d9c", "f406628ca51e0ae90ae76ea8398677a921b36f0bd71aab2099dfed08abd0322f", "f46087bbd95ebae244a0eda01a618aff11ec7a069b15a3ef8f6b520db523dcf1", "f8019c5279eb32360ca03e9fac40a12667715546eed5c5eb59eb381f2f501260", "fc5f4d209733750afd2714e9109816a29500718b32dd9a5db01c0cb3a019b96a"] -cvxpy = ["13fd80967d306c0c9959304fd633d3e494fa3b82f01e455bf18d7ceeb7f5b6c7", "1b2d3717919841b3a155db462923847a279fcf270a2895145fe43873e41fe6ad", "4aa7fc03707fccc673bd793572cc5b950ebd304c478cd9c0b6d53ccf7186a3f1", "645054acbbcc39a9bd851582224c38db141a98a1386bcc28d88019e95c920ccd", "7a37f30bf62bf2d521bbfd934aa38af718638960c837afa051b088c059e23e88", "d2297643a9223decaed6ea12b3913cf01c4aa659ac4b046a76360d7752447cbe", "d3643b915a195ef20c90aaf7a974e9df6a6831467f1aa7e64bb0bb3c9cb6df41", "faee66f3da014226829ab9b724674c6378a6e7bf57b6223bda1acf6878ef9e32"] -darglint = ["651a6029f02715e9b5af0287c1e3787518fe71478d295e4a1c3334a35a9e82a0", "c0a617f42fa196d4e0a2f8246d98b4d16c3fe9728937524a3c35fd2dbef986f6"] +coverage = ["08907593569fe59baca0bf152c43f3863201efb6113ecb38ce7e97ce339805a6", "0be0f1ed45fc0c185cfd4ecc19a1d6532d72f86a2bac9de7e24541febad72650", "141f08ed3c4b1847015e2cd62ec06d35e67a3ac185c26f7635f4406b90afa9c5", "19e4df788a0581238e9390c85a7a09af39c7b539b29f25c89209e6c3e371270d", "23cc09ed395b03424d1ae30dcc292615c1372bfba7141eb85e11e50efaa6b351", "245388cda02af78276b479f299bbf3783ef0a6a6273037d7c60dc73b8d8d7755", "331cb5115673a20fb131dadd22f5bcaf7677ef758741312bee4937d71a14b2ef", "386e2e4090f0bc5df274e720105c342263423e77ee8826002dcffe0c9533dbca", "3a794ce50daee01c74a494919d5ebdc23d58873747fa0e288318728533a3e1ca", "60851187677b24c6085248f0a0b9b98d49cba7ecc7ec60ba6b9d2e5574ac1ee9", "63a9a5fc43b58735f65ed63d2cf43508f462dc49857da70b8980ad78d41d52fc", "6b62544bb68106e3f00b21c8930e83e584fdca005d4fffd29bb39fb3ffa03cb5", "6ba744056423ef8d450cf627289166da65903885272055fb4b5e113137cfa14f", "7494b0b0274c5072bddbfd5b4a6c6f18fbbe1ab1d22a41e99cd2d00c8f96ecfe", "826f32b9547c8091679ff292a82aca9c7b9650f9fda3e2ca6bf2ac905b7ce888", "93715dffbcd0678057f947f496484e906bf9509f5c1c38fc9ba3922893cda5f5", "9a334d6c83dfeadae576b4d633a71620d40d1c379129d587faa42ee3e2a85cce", "af7ed8a8aa6957aac47b4268631fa1df984643f07ef00acd374e456364b373f5", "bf0a7aed7f5521c7ca67febd57db473af4762b9622254291fbcbb8cd0ba5e33e", "bf1ef9eb901113a9805287e090452c05547578eaab1b62e4ad456fcc049a9b7e", "c0afd27bc0e307a1ffc04ca5ec010a290e49e3afbe841c5cafc5c5a80ecd81c9", "dd579709a87092c6dbee09d1b7cfa81831040705ffa12a1b248935274aee0437", "df6712284b2e44a065097846488f66840445eb987eb81b3cc6e4149e7b6982e1", "e07d9f1a23e9e93ab5c62902833bf3e4b1f65502927379148b6622686223125c", "e2ede7c1d45e65e209d6093b762e98e8318ddeff95317d07a27a2140b80cfd24", "e4ef9c164eb55123c62411f5936b5c2e521b12356037b6e1c2617cef45523d47", "eca2b7343524e7ba246cab8ff00cab47a2d6d54ada3b02772e908a45675722e2", "eee64c616adeff7db37cc37da4180a3a5b6177f5c46b187894e633f088fb5b28", "ef824cad1f980d27f26166f86856efe11eff9912c4fed97d3804820d43fa550c", "efc89291bd5a08855829a3c522df16d856455297cf35ae827a37edac45f466a7", "fa964bae817babece5aa2e8c1af841bebb6d0b9add8e637548809d040443fee0", "ff37757e068ae606659c28c3bd0d923f9d29a85de79bf25b2b34b148473b5025"] +cvxpy = ["49e38e033a51f7df3a48b6b2c2879d56ae11bc628defbf1dc11e02836ceda1c5", "8535529ddb807067b0d59661dce1d9a6ddb2a218398a38ea7772328ad8a6ea13", "957e0c0e65d9f2cf25eb9e7aca0c518158047a27de2aaa69e9177e2ea23dbd68", "c2400927cceb170f96c1c97ad7faacaf92e941bbddb06b837d4d51fedbd7b192"] +darglint = ["4cc9f03930337112ec6c6ec2508162142140500fae84990c6ee09b8f85cebab0", "dea82e11971f9bef1710ec100703c8172e82149176d27da67a4a100cbc7d7042"] deap = ["01ab6067af3c86bd3a00a0d5e0c9860220c7cf412031f9cce18a6d08ec25b808", "0dc11a5521f661a7c7f475466d932b056fbfee8447ad73b007d69ef75c924355", "11162ae0343a25f5a8625f683ce16bad5757812d71db6b80a5ac7c25799a1a88", "1873f5e2a55ff61dac965b55cc042b2fe5529edbd54fc0fc1061664ddb4b75db", "19f6a60c91313cb1f39a9687bc54efab8abc599e5f81b87faedf583efc388602", "21787af1e4a56345bbffa6d1b07f5611d3ef7b299e5e832e6ab28dbab5c5c10e", "2f50f38ae0c82554a476d6c6013c85da4a8d7cac102edc4ec460a658200bd832", "31ed6220068e703d3f54c53046b8f25b85a9225c64e1e50172c2172c4bd8a7fe", "34594ba2f417ccb622b0ff54c25850fde80e12ca89fde6f242b15029e846be29", "3603c91779c276588884321637212511962b2f0668cec56b2b5664d28f28eee7", "5ee3cee4eac683237915bf570ede65047224ac6f392970fed029e3404935647c", "600e95e745cee25fda8c9a67219c9f46c4661da636a5af9f5e924230e7a3aeac", "6102d8bca425ff5d704f7631b69c22e33782e33020ce059cc88085746444ebfe", "669840720da9c4571efd9d0efdf90267009686b7a4c43dd4ab124e33e9cc153a", "6c5ef3b6c387cd28c7aab0297b05a9994b9c88dfd8a89236866c14703d55f9dc", "a0a0e56bd52a262ee12f84fa883b7ec5367532b784e2a6e83b1f7126b69d2300", "a1cc5fc4a2735ec5560ddef84f80beb84540d3221a147b53bba5e6a8718c8a55", "b05f607041c3f8aac5364055cb9632714bc62fe93e53283fbafea9ba91e13a69", "cd0fd7bccf7837b9e6a666b75e1c3a629fa3f5bc346cb90a9edd8cd56f085980", "cf1e53c822526bbc418333c47f668f394b00b51fddb4f15c54d5f190b2b88f17", "e648e1d76d5c8ecbce7f312bd174e4d2613debddd81f2a614b9023f7ad0331a0", "f146a9a0957510b57a2b5c669a26f0b84b2d219000b5684f4827884a75ad2ea7", "f1a0d1390e0b4f9edd4cbf2903c7d60865f43bad00de239aa066ffeda4ad7ee0", "fe789aa74ba78549030037dc9580510ff1763ef12fdf05cb92dda74237110565"] dill = ["993409439ebf7f7902d9de93eaa2a395e0446ff773d29f13dc46646482f76906"] docutils = ["54a349c622ff31c91cbec43b0b512f113b5b24daf00e2ea530bb1bd9aac14849", "ba4584f9107571ced0d2c7f56a5499c696215ba90797849c92d395979da68521", "d2ddba74835cb090a1b627d3de4e7835c628d07ee461f7b4480f51af2fe4d448"] @@ -1225,60 +1226,60 @@ enum34 = ["2d81cbbe0e73112bdfe6ef8576f2238f2ba27dd0d55752a776c41d38b7da2850", "6 fancyimpute = ["5db78d27e046e56cb8f8e6a61101ca3bdc1d1a62cde3af123b146c77200d47ae", "d49c44e878e6c9a73b3083ec84f4ee3b5ffaa80c1009ac0c966be6e5b9cd9891"] filelock = ["18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", "929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"] flake8 = ["19241c1cbc971b9962473e4438a2ca19749a7dd002dd1a946eaba171b4114548", "8e9dfa3cecb2400b3738a42c54c3043e821682b9c840b0448c0503f781130696"] -flake8-docstrings = ["4e0ce1476b64e6291520e5570cf12b05016dd4e8ae454b8a8a9a48bc5f84e1cd", "8436396b5ecad51a122a2c99ba26e5b4e623bf6e913b0fea0cb6c2c4050f91eb"] +flake8-docstrings = ["3ad372b641f4c8e70c7465f067aed4ff8bf1e9347fce14f9eb71ed816db36257", "d8d72ccd5807c1ab9ff1466cb9bece0c4d94b8669e9bc4f472abc80dbc5d399e"] flake8-polyfill = ["12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9", "e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda"] future = ["67045236dcfd6816dc439556d009594abf643e5eb48992e36beac09c2ca659b8"] gast = ["fe939df4583692f0512161ec1c880e0a10e71e6a232da045ab8edd3756fbadf0"] google-pasta = ["40b4f55ba7b44823eac96d055000572c84ce48cacb3e91c100869844064b2d07", "79d1ce28b381d68e98ef7707d19909adb58912f8dae8734402454424fc76b8fe", "7ca8afc4cfeebf4a079cdf586333d5447cecd19a997475136138fc83c3351bc4"] -grpcio = ["03b78b4e7dcdfe3e257bb528cc93923f9cbbab6d5babf15a60d21e9a4a70b1a2", "1ce0ccfbdfe84387dbcbf44adb4ae16ec7ae70e166ffab478993eb1ea1cba3ce", "22e167a9406d73dd19ffe8ed6a485f17e6eac82505be8c108897f15e68badcbb", "31d0aeca8d8ee2301c62c5c340e0889d653b1280d68f9fa203982cb6337b050e", "44c7f99ca17ebbcc96fc54ed00b454d8313f1eac28c563098d8b901025aff941", "5471444f53f9db6a1f1f11f5dbc173228881df8446380b6b98f90afb8fd8348e", "561bca3b1bde6d6564306eb05848fd155136e9c3a25d2961129b1e2edba22fce", "5bf58e1d2c2f55365c06e8cb5abe067b88ca2e5550fb62009c41df4b54505acf", "6b7163d1e85d76b0815df63fcc310daec02b44532bb433f743142d4febcb181f", "766d79cddad95f5f6020037fe60ea8b98578afdf0c59d5a60c106c1bdd886303", "770b7372d5ca68308ff66d7baee53369fa5ce985f84bcb6aa1948c1f2f7b02f2", "7ab178da777fc0f55b6aef5a755f99726e8e4b75e3903954df07b27059b54fcf", "8078305e77c2f6649d36b24d8778096413e474d9d7892c6f92cfb589c9d71b2e", "85600b63a386d860eeaa955e9335e18dd0d7e5477e9214825abf2c2884488369", "857d9b939ae128be1c0c792eb885c7ff6a386b9dea899ac4b06f4d90a31f9d87", "87a41630c90c179fa5c593400f30a467c498972c702f348d41e19dafeb1d319e", "8805d486c6128cc0fcc8ecf16c4095d99a8693a541ef851429ab334e028a4a97", "8d71b7a89c306a41ccc7741fc9409b14f5b86727455c2a1c0c7cfcb0f784e1f2", "9e1b80bd65f8f160880cb4dad7f55697f6d37b2d7f251fc0c2128e811928f369", "9e290c84a145ae2411ee0ec9913c41cd7500e2e7485fe93632434d84ef4fda67", "9ec9f88b5bc94bd99372f27cdd53af1c92ba06717380b127733b953cfb181174", "a0a02a8b4ba6deadf706d5f849539b3685b72b186a3c9ef5d43e8972ed60fb6f", "a4059c59519f5940e01a071f74ae2a60ea8f6185b03d22a09d40c7959a36b16b", "a6e028c2a6da2ebfa2365a5b32531d311fbfec0e3600fc27e901b64f0ff7e54e", "adcdebf9f8463df4120c427cf6c9aed39258bccd03ed37b6939e7a145d64d6e0", "bdec982610259d07156a58f80b8c3e69be7751a9208bc577b059c5193d087fad", "cefc4d4251ffb73feb303d4b7e9d6c367cb60f2db16d259ea28b114045f965aa", "d4145c8aa6afbac10ad27e408f7ce15992fe89ba5d0b4abca31c0c2729864c03", "da76dc5ad719ee99de5ea28a5629ff92172cbb4a70d8a6ae3a5b7a53c7382ce1", "dde2452c08ef8b6426ccab6b5b6de9f06d836d9937d6870e68153cbf8cb49348", "e3d88091d2539a4868750914a6fe7b9ec50e42b913851fc1b77423b5bd918530", "f9c67cfe6278499d7f83559dc6322a8bbb108e307817a3d7acbfea807b3603cc"] +grpcio = ["1303578092f1f6e4bfbc354c04ac422856c393723d3ffa032fff0f7cb5cfd693", "229c6b313cd82bec8f979b059d87f03cc1a48939b543fe170b5a9c5cf6a6bc69", "3cd3d99a8b5568d0d186f9520c16121a0f2a4bcad8e2b9884b76fb88a85a7774", "41cfb222db358227521f9638a6fbc397f310042a4db5539a19dea01547c621cd", "43330501660f636fd6547d1e196e395cd1e2c2ae57d62219d6184a668ffebda0", "45d7a2bd8b4f25a013296683f4140d636cdbb507d94a382ea5029a21e76b1648", "47dc935658a13b25108823dabd010194ddea9610357c5c1ef1ad7b3f5157ebee", "480aa7e2b56238badce0b9413a96d5b4c90c3bfbd79eba5a0501e92328d9669e", "4a0934c8b0f97e1d8c18e76c45afc0d02d33ab03125258179f2ac6c7a13f3626", "5624dab19e950f99e560400c59d87b685809e4cfcb2c724103f1ab14c06071f7", "60515b1405bb3dadc55e6ca99429072dad3e736afcf5048db5452df5572231ff", "610f97ebae742a57d336a69b09a9c7d7de1f62aa54aaa8adc635b38f55ba4382", "64ea189b2b0859d1f7b411a09185028744d494ef09029630200cc892e366f169", "686090c6c1e09e4f49585b8508d0a31d58bc3895e4049ea55b197d1381e9f70f", "7745c365195bb0605e3d47b480a2a4d1baa8a41a5fd0a20de5fa48900e2c886a", "79491e0d2b77a1c438116bf9e5f9e2e04e78b78524615e2ce453eff62db59a09", "825177dd4c601c487836b7d6b4ba268db59787157911c623ba59a7c03c8d3adc", "8a060e1f72fb94eee8a035ed29f1201ce903ad14cbe27bda56b4a22a8abda045", "90168cc6353e2766e47b650c963f21cfff294654b10b3a14c67e26a4e3683634", "94b7742734bceeff6d8db5edb31ac844cb68fc7f13617eca859ff1b78bb20ba1", "962aebf2dd01bbb2cdb64580e61760f1afc470781f9ecd5fe8f3d8dcd8cf4556", "9c8d9eacdce840b72eee7924c752c31b675f8aec74790e08cff184a4ea8aa9c1", "af5b929debc336f6bab9b0da6915f9ee5e41444012aed6a79a3c7e80d7662fdf", "b9cdb87fc77e9a3eabdc42a512368538d648fa0760ad30cf97788076985c790a", "c5e6380b90b389454669dc67d0a39fb4dc166416e01308fcddd694236b8329ef", "d60c90fe2bfbee735397bf75a2f2c4e70c5deab51cd40c6e4fa98fae018c8db6", "d8582c8b1b1063249da1588854251d8a91df1e210a328aeb0ece39da2b2b763b", "ddbf86ba3aa0ad8fed2867910d2913ee237d55920b55f1d619049b3399f04efc", "e46bc0664c5c8a0545857aa7a096289f8db148e7f9cca2d0b760113e8994bddc", "f6437f70ec7fed0ca3a0eef1146591bb754b418bb6c6b21db74f0333d624e135", "f71693c3396530c6b00773b029ea85e59272557e9bd6077195a6593e4229892a", "f79f7455f8fbd43e8e9d61914ecf7f48ba1c8e271801996fef8d6a8f3cc9f39f"] h5py = ["05750b91640273c69989c657eaac34b091abdd75efc8c4824c82aaf898a2da0a", "082a27208aa3a2286e7272e998e7e225b2a7d4b7821bd840aebf96d50977abbb", "08e2e8297195f9e813e894b6c63f79372582787795bba2014a2db6a2de95f713", "0dd2adeb2e9de5081eb8dcec88874e7fd35dae9a21557be3a55a3c7d491842a4", "0f94de7a10562b991967a66bbe6dda9808e18088676834c0a4dcec3fdd3bcc6f", "106e42e2e01e486a3d32eeb9ba0e3a7f65c12fa8998d63625fa41fb8bdc44cdb", "1606c66015f04719c41a9863c156fc0e6b992150de21c067444bcb82e7d75579", "1854c4beff9961e477e133143c5e5e355dac0b3ebf19c52cf7cc1b1ef757703c", "1e9fb6f1746500ea91a00193ce2361803c70c6b13f10aae9a33ad7b5bd28e800", "2cca17e80ddb151894333377675db90cd0279fa454776e0a4f74308376afd050", "30e365e8408759db3778c361f1e4e0fe8e98a875185ae46c795a85e9bafb9cdf", "3206bac900e16eda81687d787086f4ffd4f3854980d798e191a9868a6510c3ae", "3c23d72058647cee19b30452acc7895621e2de0a0bd5b8a1e34204b9ea9ed43c", "407b5f911a83daa285bbf1ef78a9909ee5957f257d3524b8606be37e8643c5f0", "4162953714a9212d373ac953c10e3329f1e830d3c7473f2a2e4f25dd6241eef0", "5fc7aba72a51b2c80605eba1c50dbf84224dcd206279d30a75c154e5652e1fe4", "713ac19307e11de4d9833af0c4bd6778bde0a3d967cafd2f0f347223711c1e31", "71b946d80ef3c3f12db157d7778b1fe74a517ca85e94809358b15580983c2ce2", "8cc4aed71e20d87e0a6f02094d718a95252f11f8ed143bc112d22167f08d4040", "9d41ca62daf36d6b6515ab8765e4c8c4388ee18e2a665701fef2b41563821002", "a744e13b000f234cd5a5b2a1f95816b819027c57f385da54ad2b7da1adace2f3", "b087ee01396c4b34e9dc41e3a6a0442158206d383c19c7d0396d52067b17c1cb", "b0f03af381d33306ce67d18275b61acb4ca111ced645381387a02c8a5ee1b796", "b9e4b8dfd587365bdd719ae178fa1b6c1231f81280b1375eef8626dfd8761bf3", "c5dd4ec75985b99166c045909e10f0534704d102848b1d9f0992720e908928e7", "d2b82f23cd862a9d05108fe99967e9edfa95c136f532a71cb3d28dc252771f50", "e58a25764472af07b7e1c4b10b0179c8ea726446c7141076286e41891bf3a563", "f3b49107fbfc77333fc2b1ef4d5de2abcd57e7ea3a1482455229494cf2da56ce"] -identify = ["0a11379b46d06529795442742a043dc2fa14cd8c995ae81d1febbc5f1c014c87", "43a5d24ffdb07bc7e21faf68b08e9f526a1f41f0056073f480291539ef961dfd"] +identify = ["9aba2d08a82aa8e6f58810d4887ed3cf103a1befeb1eaf632d9c6fd2d6642542", "b50ffad180b3a93b33a58b42597ef22493240d406ba07cc5058daf70f44b8d7c"] idna = ["c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", "ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"] imagesize = ["3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8", "f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"] -importlib-metadata = ["6dfd58dfe281e8d240937776065dd3624ad5469c835248219bd16cf2e12dbeb7", "cb6ee23b46173539939964df59d3d72c3e0c1b5d54b84f1d8a7e912fe43612db"] +importlib-metadata = ["23d3d873e008a513952355379d93cbcab874c58f4f034ff657c7a87422fa64e8", "80d2de76188eabfbfcf27e6a37342c2827801e59c4cc14b0371c56fed43820e3"] importlib-resources = ["6e2783b2538bd5a14678284a3962b0660c715e5a0f10243fd5e00a4b5974f50b", "d3279fd0f6f847cced9f7acc19bd3e5df54d34f93a2e7bb5f238f81545787078"] incremental = ["717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f", "7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3"] isort = ["54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1", "6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd"] jinja2 = ["065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", "14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b"] joblib = ["21e0c34a69ad7fde4f2b1f3402290e9ec46f545f15f1541c582edfe05d87b63a", "315d6b19643ec4afd4c41c671f9f2d65ea9d787da093487a81ead7b0bac94524"] jsonpickle = ["d0c5a4e6cb4e58f6d5406bdded44365c2bcf9c836c4f52910cc9ba7245a59dc2", "d3e922d781b1d0096df2dad89a2e1f47177d7969b596aea806a9d91b4626b29b"] -keras = ["794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6", "90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9"] +keras = ["0fb448b95643a708d25d2394183a2f3a84eefb55fb64917152a46826990113ea", "5a75cfdf69c6cb9de81a82aa19542ac69a5c2e78a48a58c1649fc5cdb55c917c"] keras-applications = ["5579f9a12bcde9748f4a12233925a59b93b73ae6947409ff34aa2ba258189fe5", "df4323692b8c1174af821bf906f1e442e63fa7589bf0f1230a0b6bdc5a810c95"] keras-preprocessing = ["44aee5f2c4d80c3b29f208359fcb336df80f293a0bb6b1c738da43ca206656fb", "5a8debe01d840de93d49e05ccf1c9b81ae30e210d34dacbcc47aeb3049b528e5"] knnimpute = ["4a05857be4b883a6d690c4c9e07e2920b324c30c97b7ed6241e0bdfb0bcb7af0"] markdown = ["2e50876bcdd74517e7b71f3e7a76102050edec255b3983403f1a63e7c8a41e7a", "56a46ac655704b91e5b7e6326ce43d5ef72411376588afa1dd90e881b83c7e8c"] markupsafe = ["00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", "09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", "09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", "1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", "24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", "29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", "43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", "46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", "500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", "535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", "62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", "6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", "717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", "79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", "7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", "88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", "8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", "98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", "9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", "9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", "ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", "b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", "b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", "b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", "ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", "c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", "cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", "e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"] -marshmallow = ["9cedfc5b6f568d57e8a2cf3d293fbd81b05e5ef557854008d03e25660a39ccfd", "a4d99922116a76e5abd8f997ec0519086e24814b7e1e1344bebe2a312ba50235"] +marshmallow = ["8a1ee88594c983336acba749d1788950d095f96538ba193fda882844c5d35129", "a339159e422a055269f5625df51fbdc7fb20512cfffa08451cd5727783ddca39"] mccabe = ["ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", "dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"] mock = ["83657d894c90d5681d62155c82bda9c1187827525880eda8ff5df4ec813437c3", "d157e52d4e5b938c550f39eb2fd15610db062441a9c2747d3dbfa9298211d0f8"] more-itertools = ["409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", "92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4"] multiprocess = ["1b7084eac782f50eb21da2ea571e062bbf504851212cdb4c7ad1cad7bed036c6", "20c3715959d45a902ae425ea5b275288093588da5826d281846912f520818712", "3ac3bad735f67407678ed2b6b682ed115b9983165f4c7cd5929afba9f1f6fe6c", "649d69115b5e93b4a5c77dee6a861a28c74105cc1d8a420da308afa99914b4ee", "64b415e0cb746bc9e9620b92a300b2d37f111ff1780ba953c899bf56cea16ce8", "786bd6684c8d339a56bf8f32fb846da4ccb5924f7c2679c946d64d2684e552f7", "8f69e705f7aabfbb8dc17252daca34331f0ce5c1b91bae0ddd0f3ee03f90a580", "b2026f00a358a3ac00f41c83d5d7f05ae15cd4b1e061fc00f672abd98cf322ae", "fc6b2d8f33e7d437a82c6d1c2f1673ae20a271152a1ac6a18571d10308de027d"] nodeenv = ["ad8259494cf1c9034539f6cced78a1da4840a4b157e23640bc4a0c0546b0cb7a"] np-utils = ["6a63bdf5c2326183e8e0086abe2a27b4029f48150b4ced8a948af72d863d6379"] -numpy = ["0778076e764e146d3078b17c24c4d89e0ecd4ac5401beff8e1c87879043a0633", "141c7102f20abe6cf0d54c4ced8d565b86df4d3077ba2343b61a6db996cefec7", "14270a1ee8917d11e7753fb54fc7ffd1934f4d529235beec0b275e2ccf00333b", "27e11c7a8ec9d5838bc59f809bfa86efc8a4fd02e58960fa9c49d998e14332d5", "2a04dda79606f3d2f760384c38ccd3d5b9bb79d4c8126b67aff5eb09a253763e", "3c26010c1b51e1224a3ca6b8df807de6e95128b0908c7e34f190e7775455b0ca", "52c40f1a4262c896420c6ea1c6fda62cf67070e3947e3307f5562bd783a90336", "6e4f8d9e8aa79321657079b9ac03f3cf3fd067bf31c1cca4f56d49543f4356a5", "7242be12a58fec245ee9734e625964b97cf7e3f2f7d016603f9e56660ce479c7", "7dc253b542bfd4b4eb88d9dbae4ca079e7bf2e2afd819ee18891a43db66c60c7", "94f5bd885f67bbb25c82d80184abbf7ce4f6c3c3a41fbaa4182f034bba803e69", "a89e188daa119ffa0d03ce5123dee3f8ffd5115c896c2a9d4f0dbb3d8b95bfa3", "ad3399da9b0ca36e2f24de72f67ab2854a62e623274607e37e0ce5f5d5fa9166", "b0348be89275fd1d4c44ffa39530c41a21062f52299b1e3ee7d1c61f060044b8", "b5554368e4ede1856121b0dfa35ce71768102e4aa55e526cb8de7f374ff78722", "cbddc56b2502d3f87fda4f98d948eb5b11f36ff3902e17cb6cc44727f2200525", "d79f18f41751725c56eceab2a886f021d70fd70a6188fd386e29a045945ffc10", "dc2ca26a19ab32dc475dbad9dfe723d3a64c835f4c23f625c2b6566ca32b9f29", "dd9bcd4f294eb0633bb33d1a74febdd2b9018b8b8ed325f861fffcd2c7660bb8", "e8baab1bc7c9152715844f1faca6744f2416929de10d7639ed49555a85549f52", "ec31fe12668af687b99acf1567399632a7c47b0e17cfb9ae47c098644ef36797", "f12b4f7e2d8f9da3141564e6737d79016fe5336cc92de6814eba579744f65b0a", "f58ac38d5ca045a377b3b377c84df8175ab992c970a53332fa8ac2373df44ff7"] +numpy = ["03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e", "0cdd229a53d2720d21175012ab0599665f8c9588b3b8ffa6095dd7b90f0691dd", "312bb18e95218bedc3563f26fcc9c1c6bfaaf9d453d15942c0839acdd7e4c473", "464b1c48baf49e8505b1bb754c47a013d2c305c5b14269b5c85ea0625b6a988a", "5adfde7bd3ee4864536e230bcab1c673f866736698724d5d28c11a4d63672658", "7724e9e31ee72389d522b88c0d4201f24edc34277999701ccd4a5392e7d8af61", "8d36f7c53ae741e23f54793ffefb2912340b800476eb0a831c6eb602e204c5c4", "910d2272403c2ea8a52d9159827dc9f7c27fb4b263749dca884e2e4a8af3b302", "951fefe2fb73f84c620bec4e001e80a80ddaa1b84dce244ded7f1e0cbe0ed34a", "9588c6b4157f493edeb9378788dcd02cb9e6a6aeaa518b511a1c79d06cbd8094", "9ce8300950f2f1d29d0e49c28ebfff0d2f1e2a7444830fbb0b913c7c08f31511", "be39cca66cc6806652da97103605c7b65ee4442c638f04ff064a7efd9a81d50a", "c3ab2d835b95ccb59d11dfcd56eb0480daea57cdf95d686d22eff35584bc4554", "eb0fc4a492cb896346c9e2c7a22eae3e766d407df3eb20f4ce027f23f76e4c54", "ec0c56eae6cee6299f41e780a0280318a93db519bbb2906103c43f3e2be1206c", "f4e4612de60a4f1c4d06c8c2857cdcb2b8b5289189a12053f37d3f41f06c60d0"] osqp = ["02dbcd69dbe07204142909a4bf99df374dae993583f3bc766b2bf8871ae2536f", "1f604f4927b375778570aa6d758c38ff61117fcbd8478fa0563a96662acb1a0b", "2cfa4eba7f92ad6996c3ace35fd82587686521cfbb23f82841636e64fe5b56f0", "39bd29fd23bbdd0a7766e4f90d330ff3cb76a7c2737519a2a307c3dfe3603015", "3a6cb649373f9c7179ba5645656d4eed804d555d87908006c4ae4db413ab3f9c", "3c49c3fd8fcda226407f1deb5326cd6d3951abe64ff86e7fca65c4c533993158", "50c2f70ec4cda87d21f18eb7e6e75f61102daf218c276e4e7bbba951be20481d", "6aa0b91c50ad5d7ab0403031552221e1a1f51fcdb5860ffddbd9c40627299846", "7439a1318f6509be5f49bd0249c15f3ad3b1754f729d4e00c379417058fc5357", "86417db99dc6cac26d7cc3ee53a7936fbe7387b87309b5e2fa6e4fff2445c9c9", "86d58b5a9f8f4dc6fd1dcb02fbb29be8c3bcae59b85620d174c88125d953707d", "86f448a71d35e3156c46efb8d9ccde8253e5858c0c483c1454df473546b60496", "8c46d87410eb4bacdd3211899be2b03fd318751c65de8d9e8ad15bcfe7ee9faf", "9cb4809fbe1afeb9cba17e7f62028de47dae22360abf55e8a29663f856c23e2a", "a7d8e28dc4c5490d4b35a88c17167c5b75ccd2e495dd5ac99fa3d510c1cfccef", "b17c6b28455bf7e3e52209c1ce48b74f86a5bc781104492d344ff93c41930b10", "c5dc13677ecd6def58c0c95f5e0afef9254531ba8c5d65efad940013622b45dd", "cc7cc1a99c54c6f192bdccba0fb263cf8cef3084a2918d3d67109db92e5148e9", "da0424e542a137629a863f71158f8a2fec1def9983c06d969891ef5ae170fc9f", "ded0027a08e2c9572a280fcd6cca9d4ed2f1c50fb80585453be4290062a6e909", "e801b30a8f7c6d51723e8275b6c143b2154d946423b6cb2b973db027b7a9955b", "eda282626fee5e9050cdb09934a91726ec39070da07d5f92b888a4d56404539b", "f14800074b44b54237ff3b892c5ccd7788e739774b739a11cfed9b44466c0471"] -packaging = ["0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af", "9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"] +packaging = ["a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9", "c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe"] pandas = ["11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60", "12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31", "1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051", "242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da", "26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7", "2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a", "31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db", "4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8", "5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4", "647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553", "66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e", "6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f", "be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba", "bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc", "d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed", "d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7", "de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c", "f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f", "f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad", "fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0"] patchy = ["21609acb2e7d6b5375c605ae1a0f13469c50569db817ee4e62336b0aff103d75", "aae8ad17484b94498c2e4232a3e419cebf526e2ad8a80282f77447e9fd4d8a5c"] patsy = ["5465be1c0e670c3a965355ec09e9a502bf2c4cbe4875e8528b0221190a8a5d40", "f115cec4201e1465cd58b9866b0b0e7b941caafec129869057405bfe5b5e3991"] pluggy = ["0825a152ac059776623854c1543d65a4ad408eb3d33ee114dff91e57ec6ae6fc", "b9817417e95936bf75d85d3f8767f7df6cdde751fc40aed3bb3074cbcb77757c"] -pre-commit = ["92e406d556190503630fd801958379861c94884693a032ba66629d0351fdccd4", "cccc39051bc2457b0c0f7152a411f8e05e3ba2fe1a5613e4ee0833c1c1985ce3"] -protobuf = ["05c36022fef3c7d3562ac22402965c0c2b9fe8421f459bb377323598996e407f", "139b7eadcca0a861d60b523cb37d9475505e0dfb07972436b15407c2b968d87e", "15f683006cb77fb849b1f561e509b03dd2b7dcc749086b8dd1831090d0ba4740", "2ad566b7b7cdd8717c7af1825e19f09e8fef2787b77fcb979588944657679604", "35cfcf97642ef62108e10a9431c77733ec7eaab8e32fe4653de20403429907cb", "387822859ecdd012fdc25ec879f7f487da6e1d5b1ae6115e227e6be208836f71", "4df14cbe1e7134afcfdbb9f058949e31c466de27d9b2f7fb4da9e0b67231b538", "586c4ca37a7146d4822c700059f150ac3445ce0aef6f3ea258640838bb892dc2", "58b11e530e954d29ab3180c48dc558a409f705bf16739fd4e0d3e07924ad7add", "63c8c98ccb8c95f41c18fb829aeeab21c6249adee4ed75354125bdc44488f30e", "72edcbacd0c73eef507d2ff1af99a6c27df18e66a3ff4351e401182e4de62b03", "83dc8a561b3b954fd7002c690bb83278b8d1742a1e28abba9aaef28b0c8b437d", "913171ecc84c2726b86574e40549a0ea619d569657c5a5ff782a3be7d81401a5", "aabb7c741d3416671c3e6fe7c52970a226e6a8274417a97d7d795f953fadef36", "b3452bbda12b1cbe2187d416779de07b2ab4c497d83a050e43c344778763721d", "c5d5b8d4a9212338297fa1fa44589f69b470c0ba1d38168b432d577176b386a8", "d86ee389c2c4fc3cebabb8ce83a8e97b6b3b5dc727b7419c1ccdc7b6e545a233", "df82a0a3494fa2100c6f94aacaa8a618281dc3535f410f3decce941aea400a50", "f2db8c754de788ab8be5e108e1e967c774c0942342b4f8aaaf14063889a6cfdc"] +pre-commit = ["21ce389ea3a480170804208baff8ceaac815ecf6b9bd6c6797de5584ad69cff8", "3b0e901f442b966444833f1924e9bf9a7c10c79741b21520f68bc87639220f5e"] +protobuf = ["00a1b0b352dc7c809749526d1688a64b62ea400c5b05416f93cfb1b11a036295", "01acbca2d2c8c3f7f235f1842440adbe01bbc379fa1cbdd80753801432b3fae9", "0a795bca65987b62d6b8a2d934aa317fd1a4d06a6dd4df36312f5b0ade44a8d9", "0ec035114213b6d6e7713987a759d762dd94e9f82284515b3b7331f34bfaec7f", "31b18e1434b4907cb0113e7a372cd4d92c047ce7ba0fa7ea66a404d6388ed2c1", "32a3abf79b0bef073c70656e86d5bd68a28a1fbb138429912c4fc07b9d426b07", "55f85b7808766e5e3f526818f5e2aeb5ba2edcc45bcccede46a3ccc19b569cb0", "64ab9bc971989cbdd648c102a96253fdf0202b0c38f15bd34759a8707bdd5f64", "64cf847e843a465b6c1ba90fb6c7f7844d54dbe9eb731e86a60981d03f5b2e6e", "917c8662b585470e8fd42f052661fc66d59fccaae450a60044307dcbf82a3335", "afed9003d7f2be2c3df20f64220c30faec441073731511728a2cb4cab4cd46a6", "b883d7eb129b1b57c5128146bc7c2d1f15de457e96a549827fbee6f26eeedc46", "bf8e05d638b585d1752c5a84247134a0350d3a8b73d3632489a014a9f6f1e758", "d831b047bd69becaf64019a47179eb22118a50dd008340655266a906c69c6417", "de2760583ed28749ff885789c1cbc6c9c06d6de92fc825740ab99deb2f25ea4d", "eabc4cf1bc19689af8022ba52fd668564a8d96e0d08f3b4732d26a64255216a4", "fcff6086c86fb1628d94ea455c7b9de898afc50378042927a59df8065a79a549"] py = ["64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", "dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53"] pycodestyle = ["95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56", "e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"] pydocstyle = ["2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8", "5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4", "ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039"] pyflakes = ["17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0", "d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"] pygments = ["71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127", "881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297"] -pyparsing = ["43c5486cefefa536c9aab528881c992328f020eefe4f6d06332449c365218580", "d6c5ffe9d0305b9b977f7a642d36b9370954d1da7ada4c62393382cbadad4265"] +pyparsing = ["6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80", "d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"] pytest = ["3f193df1cfe1d1609d4c583838bea3d532b18d6160fd3f55c9447fdca30848ec", "e246cf173c01169b9617fc07264b7b1316e78d7a650055235d6d897bc80d9660"] pytest-cov = ["2b097cde81a302e1047331b48cadacf23577e431b61e9c6f49a1170bbe3d3da6", "e00ea4fdde970725482f1f35630d12f074e121a23801aabf2ae154ec6bdd343a"] pytest-mock = ["43ce4e9dd5074993e7c021bb1c22cbb5363e612a2b5a76bc6d956775b10758b7", "5bf5771b1db93beac965a7347dc81c675ec4090cb841e49d9d34637a25c30568"] python-dateutil = ["7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", "c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"] -pytz = ["303879e36b721603cc54604edcac9d20401bdbe31e1e4fdee5b9f98d5d31dfda", "d747dd3d23d77ef44c6a3526e274af6efeb0a6f1afd5a69ba4d5be4098c8e141"] -pyyaml = ["57acc1d8533cbe51f6662a55434f0dbecfa2b9eaf115bede8f6fd00115a0c0d3", "588c94b3d16b76cfed8e0be54932e5729cc185caffaa5a451e7ad2f7ed8b4043", "68c8dd247f29f9a0d09375c9c6b8fdc64b60810ebf07ba4cdd64ceee3a58c7b7", "70d9818f1c9cd5c48bb87804f2efc8692f1023dac7f1a1a5c61d454043c1d265", "86a93cccd50f8c125286e637328ff4eef108400dd7089b46a7be3445eecfa391", "a0f329125a926876f647c9fa0ef32801587a12328b4a3c741270464e3e4fa778", "a3c252ab0fa1bb0d5a3f6449a4826732f3eb6c0270925548cac342bc9b22c225", "b4bb4d3f5e232425e25dda21c070ce05168a786ac9eda43768ab7f3ac2770955", "cd0618c5ba5bda5f4039b9398bb7fb6a317bb8298218c3de25c47c4740e4b95e", "ceacb9e5f8474dcf45b940578591c7f3d960e82f926c707788a570b51ba59190", "fe6a88094b64132c4bb3b631412e90032e8cfe9745a58370462240b8cb7553cd"] +pytz = ["26c0b32e437e54a18161324a2fca3c4b9846b74a8dccddd843113109e1116b32", "c894d57500a4cd2d5c71114aaab77dbab5eabd9022308ce5ac9bb93a60a6f0c7"] +pyyaml = ["0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9", "01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4", "5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8", "5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696", "7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34", "7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9", "87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73", "9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299", "a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b", "b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae", "b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681", "bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41", "f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8"] requests = ["11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", "9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"] scikit-learn = ["0a718b5ffbd5053fb3f9e1a2e20b7c4f256dd8035e246b907d3117d20bac0260", "1725540b754a9967778e9385e1ee2c8db50d5ab70ed835c9f5e36002ffabc169", "3e3ce307d7c5c5811658ba8686b24b571a8244eaafe707665ad601f400d5ce98", "42ad71502237c9fe300ecf157f5a394df717789a2dde541dd7034b539c70bdcc", "42cba716db197e0d1670e2fc13c4cc4a86d5c5358120ccfee6ec427b154e74ff", "47b4090b7686642e41176becb7c42ef3cc665d7ee0db5e7ea5d307ec9779327e", "51d99a08c8bf689cf60c9d8dca6e3d3e5f6d762def85ad735dcea11fb528a89b", "5f7577fbb2399a4712e96cf0e786638168940a876c33735a1b5d5a86ba4b1370", "66bfc2b6b15db1725d03ea657ec9184ff09dcbf1ecd834ef85f2edc2c9cbba97", "69a34d389d9ca4687ad00af4e11d53686771f484c37366f68617ef656bab16ab", "75297f3dd6685f01555f1bb75846995d45650af417280b69c81bf11b6987aed5", "9ebb38ab1d0ee143982aed561811903ac6c1abb512ae2b9019b3b65bde63ffb9", "a402c1484fe65df42d5dbc22a58e0695fe3afe2b0b229aee2a09c6d60ba8e5c2", "aad6b9aac1617bd7efa0450643888bbd3410679a94bc8680d9863825686ef369", "ad4db28d3dc16c01df75ed6efb72524537de3839a5d179fcf94094359fc72ec5", "b276739a5f863ccacb61999a3067d0895ee291c95502929b2ae56ea1f882e888", "b3dc88c4d2bcb26ffc5afe16d053ae28317d7d1de083651defcd5453a04f1563", "b3e4681253e95da5aa5c231889a32b084fd997962bf8beda6f796bf422f734b2", "c3d852d49d6c1710089d4513702099fa6f8e1aebfedf222319d80c47b0a195f8", "c6612e7e43988b8b5e1957150449493a55f9c059de641083df7a964f86f2d1e7", "c69e5c6051366a6ac9600d730276db939b1a205e42504ec0b8371f154b0058db", "ce121baa8e85ec27c3065281657dcd78adaab7dcb046c7fe96ad4e5a9dcb6610", "ed2a9a9bea6ec443b7effe5695c9c168b7bf9a67df6d880729760feda871b6a3", "efd842d70b87e3ef3429c3149840b9189d4441ca951ab0cec62c94a964e219d9", "f1428af5c381f6eef30ffbc7e047b7c713d4efa5d7bf5e57b62b3fc8d387044b", "f6c7bf8cd4de1640b760b47f4d28deb26dbbf9acbe0194cdff54a898e190d872", "f8329ac2160ad8bbbac6a507374685ceca3f24ca427fa9ee61a501280e1972d9", "fefba2a43b92f8393366093b60efbe984a72a2b41cce16b4002005e4104ef938"] -scipy = ["03b1e0775edbe6a4c64effb05fff2ce1429b76d29d754aa5ee2d848b60033351", "09d008237baabf52a5d4f5a6fcf9b3c03408f3f61a69c404472a16861a73917e", "10325f0ffac2400b1ec09537b7e403419dcd25d9fee602a44e8a32119af9079e", "1db9f964ed9c52dc5bd6127f0dd90ac89791daa690a5665cc01eae185912e1ba", "409846be9d6bdcbd78b9e5afe2f64b2da5a923dd7c1cd0615ce589489533fdbb", "4907040f62b91c2e170359c3d36c000af783f0fa1516a83d6c1517cde0af5340", "6c0543f2fdd38dee631fb023c0f31c284a532d205590b393d72009c14847f5b1", "826b9f5fbb7f908a13aa1efd4b7321e36992f5868d5d8311c7b40cf9b11ca0e7", "a7695a378c2ce402405ea37b12c7a338a8755e081869bd6b95858893ceb617ae", "a84c31e8409b420c3ca57fd30c7589378d6fdc8d155d866a7f8e6e80dec6fd06", "adadeeae5500de0da2b9e8dd478520d0a9945b577b2198f2462555e68f58e7ef", "b283a76a83fe463c9587a2c88003f800e08c3929dfbeba833b78260f9c209785", "c19a7389ab3cd712058a8c3c9ffd8d27a57f3d84b9c91a931f542682bb3d269d", "c3bb4bd2aca82fb498247deeac12265921fe231502a6bc6edea3ee7fe6c40a7a", "c5ea60ece0c0c1c849025bfc541b60a6751b491b6f11dd9ef37ab5b8c9041921", "db61a640ca20f237317d27bc658c1fc54c7581ff7f6502d112922dc285bdabee"] +scipy = ["0baa64bf42592032f6f6445a07144e355ca876b177f47ad8d0612901c9375bef", "243b04730d7223d2b844bda9500310eecc9eda0cba9ceaf0cde1839f8287dfa8", "2643cfb46d97b7797d1dbdb6f3c23fe3402904e3c90e6facfe6a9b98d808c1b5", "396eb4cdad421f846a1498299474f0a3752921229388f91f60dc3eda55a00488", "3ae3692616975d3c10aca6d574d6b4ff95568768d4525f76222fb60f142075b9", "435d19f80b4dcf67dc090cc04fde2c5c8a70b3372e64f6a9c58c5b806abfa5a8", "46a5e55850cfe02332998b3aef481d33f1efee1960fe6cfee0202c7dd6fc21ab", "75b513c462e58eeca82b22fc00f0d1875a37b12913eee9d979233349fce5c8b2", "7ccfa44a08226825126c4ef0027aa46a38c928a10f0a8a8483c80dd9f9a0ad44", "89dd6a6d329e3f693d1204d5562dd63af0fd7a17854ced17f9cbc37d5b853c8d", "a81da2fe32f4eab8b60d56ad43e44d93d392da228a77e229e59b51508a00299c", "a9d606d11eb2eec7ef893eb825017fbb6eef1e1d0b98a5b7fc11446ebeb2b9b1", "ac37eb652248e2d7cbbfd89619dce5ecfd27d657e714ed049d82f19b162e8d45", "cbc0611699e420774e945f6a4e2830f7ca2b3ee3483fca1aa659100049487dd5", "d02d813ec9958ed63b390ded463163685af6025cb2e9a226ec2c477df90c6957", "dd3b52e00f93fd1c86f2d78243dfb0d02743c94dd1d34ffea10055438e63b99d"] scs = ["1d67a34b36ef31fc23eeb29c97d3b126d8c70f6420e4da600d674f478d1985d4", "d3760446df7265ddd9c277759a2975250d3cd017703f1a0543ad55a222ac07ed"] seed-isort-config = ["072c665a64bca56b23a942f79acf6542a0ef07c5a3c965fdbb9d23a64d026d81", "cbb88394edd403e9da23ee24b10e26dbc4724805c477daef143acb67020e9684"] six = ["3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", "d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"] @@ -1297,12 +1298,12 @@ toml = ["229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", "235 towncrier = ["48251a1ae66d2cf7e6fa5552016386831b3e12bb3b2d08eb70374508c17a8196", "de19da8b8cb44f18ea7ed3a3823087d2af8fcf497151bb9fd1e1b092ff56ed8d"] tox = ["dab0b0160dd187b654fc33d690ee1d7bf328bd5b8dc6ef3bb3cc468969c659ba", "ee35ffce74933a6c6ac10c9a0182e41763140a5a5070e21b114feca56eaccdcd"] tpot = ["276b58ac5b5dc67faf1340c36ac6135f2e717c0a8e04ab6ae382b56c34addfba", "9c9a13c82bb3ad63de4ecc5c8e1d8f615fe13f14340ac0ac3e9426b839b29b99"] -tqdm = ["14a285392c32b6f8222ecfbcd217838f88e11630affe9006cd0e94c7eff3cb61", "25d4c0ea02a305a688e7e9c2cdc8f862f989ef2a4701ab28ee963295f5b109ab"] +tqdm = ["438d6a735167099d75e5fd9a55175c6727c4dbba345ae406b2886c2728fe3e80", "ebc205051d79b49989140f5f6c73ec23fce5f590cbc4d9cd6e4c47f168fa0f10"] update-checker = ["59cfad7f9a0ee99f95f1dfc60f55bf184937bcab46a7270341c2c33695572453", "70e39446fccf77b21192cf7a8214051fa93a636dc3b5c8b602b589d100a168b8"] urllib3 = ["b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", "dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232"] -virtualenv = ["5fe3719f0185cc6d4e7ebb2d3b73bfadbebaaf0afa73fb312ec8b193ef8335b3", "d0158c9784570aab78cbb1e4dc59938de128d38e20e5271d9997ada4b417012c"] +virtualenv = ["94a6898293d07f84a98add34c4df900f8ec64a570292279f6d91c781d37fd305", "f6fc312c031f2d2344f885de114f1cb029dfcffd26aa6e57d2ee2296935c4e7d"] werkzeug = ["87ae4e5b5366da2347eb3116c0e6c681a0e939a33b2805e2c0cbd282664932c4", "a13b74dd3c45f758d4ebdb224be8f1ab8ef58b3c0ffc1783a8c7d9f4f50227e6"] -wheel = ["5e79117472686ac0c4aef5bad5172ea73a1c2d1646b808c35926bd26bdfb0c08", "62fcfa03d45b5b722539ccbc07b190e4bfff4bb9e3a4d470dd9f6a0981002565"] +wheel = ["10c9da68765315ed98850f8e048347c3eb06dd81822dc2ab1d4fde9dc9702646", "f4da1763d3becf2e2cd92a14a7c920f0f00eca30fdde9ea992c836685b9faf28"] wrapt = ["565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1"] xdoctest = ["d8e0b03fb76d9ea84a814cd574803f9a11bf0de2c47aceaf253ef0268cc7a809"] zipp = ["4970c3758f4e89a7857a973b1e2a5d75bcdc47794442f2e2dd4fe8e0466e809a", "8a5712cfd3bb4248015eb3b0b3c54a5f6ee3f2425963ef2a0125b8bc40aafaec"] From cf34e1e2a3d14052ec5dee8b40d8444b723e8d6f Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Fri, 23 Aug 2019 15:05:21 -0400 Subject: [PATCH 36/37] Upgrading dependencies --- poetry.lock | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + setup.cfg | 4 ++-- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3d29e0b..6b23b7a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -218,6 +218,14 @@ version = "1.3.0" [package.dependencies] numpy = "*" +[[package]] +category = "main" +description = "Better living through Python with decorators" +name = "decorator" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +version = "4.4.0" + [[package]] category = "main" description = "serialize all of python" @@ -374,6 +382,23 @@ version = "2.9.0" numpy = ">=1.7" six = "*" +[[package]] +category = "main" +description = "Distributed Asynchronous Hyperparameter Optimization" +name = "hyperopt" +optional = false +python-versions = "*" +version = "0.1.2" + +[package.dependencies] +future = "*" +networkx = "*" +numpy = "*" +pymongo = "*" +scipy = "*" +six = "*" +tqdm = "*" + [[package]] category = "dev" description = "File identification library for Python" @@ -582,6 +607,17 @@ version = "0.70.8" [package.dependencies] dill = ">=0.3.0" +[[package]] +category = "main" +description = "Python package for creating and manipulating graphs and networks" +name = "networkx" +optional = false +python-versions = ">=3.5" +version = "2.3" + +[package.dependencies] +decorator = ">=4.3.0" + [[package]] category = "dev" description = "Node.js virtual environment builder" @@ -762,6 +798,14 @@ optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" version = "2.4.2" +[[package]] +category = "main" +description = "Python driver for MongoDB " +name = "pymongo" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +version = "3.9.0" + [[package]] category = "main" description = "Python parsing module" @@ -1192,7 +1236,7 @@ version = "0.5.2" doc = ["sphinx", "sphinx_rtd_theme", "sphinxcontrib-plantuml", "docutils"] [metadata] -content-hash = "21268141db69de2b5ae77d0306654321f5028dbdd7dad505b2a3dedce4152845" +content-hash = "1dd71219e90b3022c40a666e6257091d78d5d82f1072627cff13a0b04ee17390" python-versions = "^3.6" [metadata.hashes] @@ -1218,6 +1262,7 @@ coverage = ["08907593569fe59baca0bf152c43f3863201efb6113ecb38ce7e97ce339805a6", cvxpy = ["49e38e033a51f7df3a48b6b2c2879d56ae11bc628defbf1dc11e02836ceda1c5", "8535529ddb807067b0d59661dce1d9a6ddb2a218398a38ea7772328ad8a6ea13", "957e0c0e65d9f2cf25eb9e7aca0c518158047a27de2aaa69e9177e2ea23dbd68", "c2400927cceb170f96c1c97ad7faacaf92e941bbddb06b837d4d51fedbd7b192"] darglint = ["4cc9f03930337112ec6c6ec2508162142140500fae84990c6ee09b8f85cebab0", "dea82e11971f9bef1710ec100703c8172e82149176d27da67a4a100cbc7d7042"] deap = ["01ab6067af3c86bd3a00a0d5e0c9860220c7cf412031f9cce18a6d08ec25b808", "0dc11a5521f661a7c7f475466d932b056fbfee8447ad73b007d69ef75c924355", "11162ae0343a25f5a8625f683ce16bad5757812d71db6b80a5ac7c25799a1a88", "1873f5e2a55ff61dac965b55cc042b2fe5529edbd54fc0fc1061664ddb4b75db", "19f6a60c91313cb1f39a9687bc54efab8abc599e5f81b87faedf583efc388602", "21787af1e4a56345bbffa6d1b07f5611d3ef7b299e5e832e6ab28dbab5c5c10e", "2f50f38ae0c82554a476d6c6013c85da4a8d7cac102edc4ec460a658200bd832", "31ed6220068e703d3f54c53046b8f25b85a9225c64e1e50172c2172c4bd8a7fe", "34594ba2f417ccb622b0ff54c25850fde80e12ca89fde6f242b15029e846be29", "3603c91779c276588884321637212511962b2f0668cec56b2b5664d28f28eee7", "5ee3cee4eac683237915bf570ede65047224ac6f392970fed029e3404935647c", "600e95e745cee25fda8c9a67219c9f46c4661da636a5af9f5e924230e7a3aeac", "6102d8bca425ff5d704f7631b69c22e33782e33020ce059cc88085746444ebfe", "669840720da9c4571efd9d0efdf90267009686b7a4c43dd4ab124e33e9cc153a", "6c5ef3b6c387cd28c7aab0297b05a9994b9c88dfd8a89236866c14703d55f9dc", "a0a0e56bd52a262ee12f84fa883b7ec5367532b784e2a6e83b1f7126b69d2300", "a1cc5fc4a2735ec5560ddef84f80beb84540d3221a147b53bba5e6a8718c8a55", "b05f607041c3f8aac5364055cb9632714bc62fe93e53283fbafea9ba91e13a69", "cd0fd7bccf7837b9e6a666b75e1c3a629fa3f5bc346cb90a9edd8cd56f085980", "cf1e53c822526bbc418333c47f668f394b00b51fddb4f15c54d5f190b2b88f17", "e648e1d76d5c8ecbce7f312bd174e4d2613debddd81f2a614b9023f7ad0331a0", "f146a9a0957510b57a2b5c669a26f0b84b2d219000b5684f4827884a75ad2ea7", "f1a0d1390e0b4f9edd4cbf2903c7d60865f43bad00de239aa066ffeda4ad7ee0", "fe789aa74ba78549030037dc9580510ff1763ef12fdf05cb92dda74237110565"] +decorator = ["86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de", "f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6"] dill = ["993409439ebf7f7902d9de93eaa2a395e0446ff773d29f13dc46646482f76906"] docutils = ["54a349c622ff31c91cbec43b0b512f113b5b24daf00e2ea530bb1bd9aac14849", "ba4584f9107571ced0d2c7f56a5499c696215ba90797849c92d395979da68521", "d2ddba74835cb090a1b627d3de4e7835c628d07ee461f7b4480f51af2fe4d448"] ecos = ["1f811e7244a58a7037474b3dead85c7ffd524cf631c90584cc544e438f114cf9", "30e7e9c5ad8a012ba1f69aa6827beb99f208323f678a82fcccc30e4ab8090aad", "361758a3568eb5a3a9b37c00a22b36f9548bbd0cd21f1da904a3627285bb4274", "4b3068ef023c4f39f6c0d6fd73d27d8a8008f8b14e1f4ddd8d0f4876e841b986", "4f775caf828597d094cddae54f7ed12b4a5aa760f535a92ccabec47741e4d61c", "4fdfee011853cd07d494ef58a9299b1f0ccf0268c375109cb2b60727f746ea74", "54f7c480029fbfa738ddb8e538388868b2a17c9189a426d924ca0c36e4cbbbd5", "574fa26661d192e48551a30e217296875020fdb7eebf9a072a14d68a0b9de03a", "6b0829b76ba49f6ebf8b9512673a0b702b756704927cbb106043e1b2273dadea", "72657189f71dbde01d1df841d2139e04da17071c8d3270919c8501f239d8c8e4", "7c871b7a49a5855e7df0d60a06d0e2148a4b183d612f7db5e155988629c3ec21", "831acb6bac205025ffe87002d8f425d2764a70db3d9d053a1f7e0e50bc2a18b9", "83e90f42b3f32e2a93f255c3cfad2da78dbd859119e93844c45d2fca20bdc758", "845455f99cd579ee0cdfbcea675b4e4f7674b563e6a54a225977fc0819cda7e0", "912b17f72476aff33b32e92decd7c02ddb929db28227563b3750948783cff6f4", "94dd0f82a18550232e4924e6c42730c46d7cdc03c4e2dc889e98ec97c0f24061", "96ddc1c4e440820bb343c44785da480a64a9c468ec467b997e40f0c7d3236226", "db7433051f6072d4821ebc582e9ff853d7d631ed98770550d248eae70b29dd26", "dd9f01e28fe58894fb394931804884122606fb4e2a59d4514b803e9cd11b7d2b", "e002df0f4b6777be68c73756e60f3cf76cb5f2f7d36c4f1a482c5538aac1a287", "fb64fb29aef26474f807df4f0c198a6d192291edc9faa3bb05e3bc9b1e2b960a", "feda86ddd191b1ae34d5ea615743894d1baa800f2dbb552cb7c7095a87037831"] @@ -1233,6 +1278,7 @@ gast = ["fe939df4583692f0512161ec1c880e0a10e71e6a232da045ab8edd3756fbadf0"] google-pasta = ["40b4f55ba7b44823eac96d055000572c84ce48cacb3e91c100869844064b2d07", "79d1ce28b381d68e98ef7707d19909adb58912f8dae8734402454424fc76b8fe", "7ca8afc4cfeebf4a079cdf586333d5447cecd19a997475136138fc83c3351bc4"] grpcio = ["1303578092f1f6e4bfbc354c04ac422856c393723d3ffa032fff0f7cb5cfd693", "229c6b313cd82bec8f979b059d87f03cc1a48939b543fe170b5a9c5cf6a6bc69", "3cd3d99a8b5568d0d186f9520c16121a0f2a4bcad8e2b9884b76fb88a85a7774", "41cfb222db358227521f9638a6fbc397f310042a4db5539a19dea01547c621cd", "43330501660f636fd6547d1e196e395cd1e2c2ae57d62219d6184a668ffebda0", "45d7a2bd8b4f25a013296683f4140d636cdbb507d94a382ea5029a21e76b1648", "47dc935658a13b25108823dabd010194ddea9610357c5c1ef1ad7b3f5157ebee", "480aa7e2b56238badce0b9413a96d5b4c90c3bfbd79eba5a0501e92328d9669e", "4a0934c8b0f97e1d8c18e76c45afc0d02d33ab03125258179f2ac6c7a13f3626", "5624dab19e950f99e560400c59d87b685809e4cfcb2c724103f1ab14c06071f7", "60515b1405bb3dadc55e6ca99429072dad3e736afcf5048db5452df5572231ff", "610f97ebae742a57d336a69b09a9c7d7de1f62aa54aaa8adc635b38f55ba4382", "64ea189b2b0859d1f7b411a09185028744d494ef09029630200cc892e366f169", "686090c6c1e09e4f49585b8508d0a31d58bc3895e4049ea55b197d1381e9f70f", "7745c365195bb0605e3d47b480a2a4d1baa8a41a5fd0a20de5fa48900e2c886a", "79491e0d2b77a1c438116bf9e5f9e2e04e78b78524615e2ce453eff62db59a09", "825177dd4c601c487836b7d6b4ba268db59787157911c623ba59a7c03c8d3adc", "8a060e1f72fb94eee8a035ed29f1201ce903ad14cbe27bda56b4a22a8abda045", "90168cc6353e2766e47b650c963f21cfff294654b10b3a14c67e26a4e3683634", "94b7742734bceeff6d8db5edb31ac844cb68fc7f13617eca859ff1b78bb20ba1", "962aebf2dd01bbb2cdb64580e61760f1afc470781f9ecd5fe8f3d8dcd8cf4556", "9c8d9eacdce840b72eee7924c752c31b675f8aec74790e08cff184a4ea8aa9c1", "af5b929debc336f6bab9b0da6915f9ee5e41444012aed6a79a3c7e80d7662fdf", "b9cdb87fc77e9a3eabdc42a512368538d648fa0760ad30cf97788076985c790a", "c5e6380b90b389454669dc67d0a39fb4dc166416e01308fcddd694236b8329ef", "d60c90fe2bfbee735397bf75a2f2c4e70c5deab51cd40c6e4fa98fae018c8db6", "d8582c8b1b1063249da1588854251d8a91df1e210a328aeb0ece39da2b2b763b", "ddbf86ba3aa0ad8fed2867910d2913ee237d55920b55f1d619049b3399f04efc", "e46bc0664c5c8a0545857aa7a096289f8db148e7f9cca2d0b760113e8994bddc", "f6437f70ec7fed0ca3a0eef1146591bb754b418bb6c6b21db74f0333d624e135", "f71693c3396530c6b00773b029ea85e59272557e9bd6077195a6593e4229892a", "f79f7455f8fbd43e8e9d61914ecf7f48ba1c8e271801996fef8d6a8f3cc9f39f"] h5py = ["05750b91640273c69989c657eaac34b091abdd75efc8c4824c82aaf898a2da0a", "082a27208aa3a2286e7272e998e7e225b2a7d4b7821bd840aebf96d50977abbb", "08e2e8297195f9e813e894b6c63f79372582787795bba2014a2db6a2de95f713", "0dd2adeb2e9de5081eb8dcec88874e7fd35dae9a21557be3a55a3c7d491842a4", "0f94de7a10562b991967a66bbe6dda9808e18088676834c0a4dcec3fdd3bcc6f", "106e42e2e01e486a3d32eeb9ba0e3a7f65c12fa8998d63625fa41fb8bdc44cdb", "1606c66015f04719c41a9863c156fc0e6b992150de21c067444bcb82e7d75579", "1854c4beff9961e477e133143c5e5e355dac0b3ebf19c52cf7cc1b1ef757703c", "1e9fb6f1746500ea91a00193ce2361803c70c6b13f10aae9a33ad7b5bd28e800", "2cca17e80ddb151894333377675db90cd0279fa454776e0a4f74308376afd050", "30e365e8408759db3778c361f1e4e0fe8e98a875185ae46c795a85e9bafb9cdf", "3206bac900e16eda81687d787086f4ffd4f3854980d798e191a9868a6510c3ae", "3c23d72058647cee19b30452acc7895621e2de0a0bd5b8a1e34204b9ea9ed43c", "407b5f911a83daa285bbf1ef78a9909ee5957f257d3524b8606be37e8643c5f0", "4162953714a9212d373ac953c10e3329f1e830d3c7473f2a2e4f25dd6241eef0", "5fc7aba72a51b2c80605eba1c50dbf84224dcd206279d30a75c154e5652e1fe4", "713ac19307e11de4d9833af0c4bd6778bde0a3d967cafd2f0f347223711c1e31", "71b946d80ef3c3f12db157d7778b1fe74a517ca85e94809358b15580983c2ce2", "8cc4aed71e20d87e0a6f02094d718a95252f11f8ed143bc112d22167f08d4040", "9d41ca62daf36d6b6515ab8765e4c8c4388ee18e2a665701fef2b41563821002", "a744e13b000f234cd5a5b2a1f95816b819027c57f385da54ad2b7da1adace2f3", "b087ee01396c4b34e9dc41e3a6a0442158206d383c19c7d0396d52067b17c1cb", "b0f03af381d33306ce67d18275b61acb4ca111ced645381387a02c8a5ee1b796", "b9e4b8dfd587365bdd719ae178fa1b6c1231f81280b1375eef8626dfd8761bf3", "c5dd4ec75985b99166c045909e10f0534704d102848b1d9f0992720e908928e7", "d2b82f23cd862a9d05108fe99967e9edfa95c136f532a71cb3d28dc252771f50", "e58a25764472af07b7e1c4b10b0179c8ea726446c7141076286e41891bf3a563", "f3b49107fbfc77333fc2b1ef4d5de2abcd57e7ea3a1482455229494cf2da56ce"] +hyperopt = ["2832a9f321ae1167dbd39a7382721933c6885b682ea90165dd90cd64938aa226", "d95ac8a8dd14a56a1d52c0b0e8501c16463e99d3a9571e297058a6909131878c", "df8c48a62bc1614bdc37f5cc570064a93a4b81a16559621db9acee3f6536b658"] identify = ["9aba2d08a82aa8e6f58810d4887ed3cf103a1befeb1eaf632d9c6fd2d6642542", "b50ffad180b3a93b33a58b42597ef22493240d406ba07cc5058daf70f44b8d7c"] idna = ["c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", "ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"] imagesize = ["3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8", "f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"] @@ -1254,6 +1300,7 @@ mccabe = ["ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", "d mock = ["83657d894c90d5681d62155c82bda9c1187827525880eda8ff5df4ec813437c3", "d157e52d4e5b938c550f39eb2fd15610db062441a9c2747d3dbfa9298211d0f8"] more-itertools = ["409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", "92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4"] multiprocess = ["1b7084eac782f50eb21da2ea571e062bbf504851212cdb4c7ad1cad7bed036c6", "20c3715959d45a902ae425ea5b275288093588da5826d281846912f520818712", "3ac3bad735f67407678ed2b6b682ed115b9983165f4c7cd5929afba9f1f6fe6c", "649d69115b5e93b4a5c77dee6a861a28c74105cc1d8a420da308afa99914b4ee", "64b415e0cb746bc9e9620b92a300b2d37f111ff1780ba953c899bf56cea16ce8", "786bd6684c8d339a56bf8f32fb846da4ccb5924f7c2679c946d64d2684e552f7", "8f69e705f7aabfbb8dc17252daca34331f0ce5c1b91bae0ddd0f3ee03f90a580", "b2026f00a358a3ac00f41c83d5d7f05ae15cd4b1e061fc00f672abd98cf322ae", "fc6b2d8f33e7d437a82c6d1c2f1673ae20a271152a1ac6a18571d10308de027d"] +networkx = ["8311ddef63cf5c5c5e7c1d0212dd141d9a1fe3f474915281b73597ed5f1d4e3d"] nodeenv = ["ad8259494cf1c9034539f6cced78a1da4840a4b157e23640bc4a0c0546b0cb7a"] np-utils = ["6a63bdf5c2326183e8e0086abe2a27b4029f48150b4ced8a948af72d863d6379"] numpy = ["03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e", "0cdd229a53d2720d21175012ab0599665f8c9588b3b8ffa6095dd7b90f0691dd", "312bb18e95218bedc3563f26fcc9c1c6bfaaf9d453d15942c0839acdd7e4c473", "464b1c48baf49e8505b1bb754c47a013d2c305c5b14269b5c85ea0625b6a988a", "5adfde7bd3ee4864536e230bcab1c673f866736698724d5d28c11a4d63672658", "7724e9e31ee72389d522b88c0d4201f24edc34277999701ccd4a5392e7d8af61", "8d36f7c53ae741e23f54793ffefb2912340b800476eb0a831c6eb602e204c5c4", "910d2272403c2ea8a52d9159827dc9f7c27fb4b263749dca884e2e4a8af3b302", "951fefe2fb73f84c620bec4e001e80a80ddaa1b84dce244ded7f1e0cbe0ed34a", "9588c6b4157f493edeb9378788dcd02cb9e6a6aeaa518b511a1c79d06cbd8094", "9ce8300950f2f1d29d0e49c28ebfff0d2f1e2a7444830fbb0b913c7c08f31511", "be39cca66cc6806652da97103605c7b65ee4442c638f04ff064a7efd9a81d50a", "c3ab2d835b95ccb59d11dfcd56eb0480daea57cdf95d686d22eff35584bc4554", "eb0fc4a492cb896346c9e2c7a22eae3e766d407df3eb20f4ce027f23f76e4c54", "ec0c56eae6cee6299f41e780a0280318a93db519bbb2906103c43f3e2be1206c", "f4e4612de60a4f1c4d06c8c2857cdcb2b8b5289189a12053f37d3f41f06c60d0"] @@ -1270,6 +1317,7 @@ pycodestyle = ["95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56 pydocstyle = ["2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8", "5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4", "ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039"] pyflakes = ["17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0", "d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"] pygments = ["71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127", "881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297"] +pymongo = ["09f8196e1cb081713aa3face08d1806dc0a5dd64cb9f67fefc568519253a7ff2", "1be549c0ce2ba8242c149156ae2064b12a5d4704448d49f630b4910606efd474", "1f9fe869e289210250cba4ea20fbd169905b1793e1cd2737f423e107061afa98", "3653cea82d1e35edd0a2355150daf8a27ebf12cf55182d5ad1046bfa288f5140", "4249c6ba45587b959292a727532826c5032d59171f923f7f823788f413c2a5a3", "4ff8f5e7c0a78983c1ee07894fff1b21c0e0ad3a122d9786cc3745fd60e4a2ce", "56b29c638ab924716b48a3e94e3d7ac00b04acec1daa8190c36d61fc714c3629", "56ec9358bbfe5ae3b25e785f8a14619d6799c855a44734c9098bb457174019bf", "5b59bbde4eb417f3f9379f7b1a9de3669894f2bae9de933a836e2bffea2bbfa1", "5dca250cbf1183c3e7b7b18c882c2b2199bfb20c74c4c68dbf11596808a296da", "61101d1cc92881fac1f9ac7e99b033062f4c210178dc33193c8f5567feecb069", "7b4aea184e4868ebd4f9f786ffee14a1121bda5436ad04f6bcbacfa2147f8386", "86624c0205a403fb4fbfedef79c5b4ab27e21fd018fdb6a27cf03b3c32a9e2b9", "88ac09e1b197c3b4531e43054d49c022a3ea1281431b2f4980abafa35d2a5ce2", "8b0339809b12ea292d468524dd1777f1a9637d9bdc0353a9261b88f82537d606", "93dbf7388f6bf9af48dbb32f265b75b3dbc743a7a2ce98e44c88c049c58d85d3", "9b705daec636c560dd2d63935f428a6b3cddfe903fffc0f349e0e91007c893d6", "a090a819fe6fefadc2901d3911c07c76c0935ec5c790a50e9f3c3c47bacd5978", "a102b346f1921237eaa9a31ee89eda57ad3c3973d79be3a456d92524e7df8fec", "a13363869f2f36291d6367069c65d51d7b8d1b2fb410266b0b6b1f3c90d6deb0", "a409a43c76da50881b70cc9ee70a1744f882848e8e93a68fb434254379777fa3", "a76475834a978058425b0163f1bad35a5f70e45929a543075633c3fc1df564c5", "ad474e93525baa6c58d75d63a73143af24c9f93c8e26e8d382f32c4da637901a", "b268c7fa03ac77a8662fab3b2ab0be4beecb82f60f4c24b584e69565691a107f", "b67ec339b180acdbebcd03807ae4b1764a43e7069340fe860a60ac310b9d38be", "cca4e1ab5ba0cd7877d3938167ee8ae9c2986cc0e10d3dcc3243d664d3a83fec", "cef61de3f0f4441ec40266ff2ab42e5c16eaba1dc1fc6e1036f274621c52adc1", "e28153b5d5ca33d4ba0c3bbc0e1ff161b9016e5e5f3f8ca10d6fa49106eb9e04", "f30d7b37804daf0bab1143abc71666c630d7e270f5c14c5a7c300a6699c21108", "f70f0133301cccf9bfd68fd20f67184ef991be578b646e78441106f9e27cc44d", "fa75c21c1d82f20cce62f6fc4a68c2b0f33572ab406df1b17cd77a947d0b2993"] pyparsing = ["6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80", "d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"] pytest = ["3f193df1cfe1d1609d4c583838bea3d532b18d6160fd3f55c9447fdca30848ec", "e246cf173c01169b9617fc07264b7b1316e78d7a650055235d6d897bc80d9660"] pytest-cov = ["2b097cde81a302e1047331b48cadacf23577e431b61e9c6f49a1170bbe3d3da6", "e00ea4fdde970725482f1f35630d12f074e121a23801aabf2ae154ec6bdd343a"] diff --git a/pyproject.toml b/pyproject.toml index e0718eb..be6f6b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ sphinx_rtd_theme = { version ="^0.4.1", optional = true } sphinxcontrib-plantuml = { version ="^0.16.1", optional = true } docutils = { version ="<0.15.1", optional= true } # hot fix: https://github.com/sdispater/poetry/issues/1194 patchy = "^1.5" +hyperopt = "^0.1.2" [tool.poetry.dev-dependencies] # Linting diff --git a/setup.cfg b/setup.cfg index b943875..dc8ffcd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,8 +33,8 @@ per-file-ignores = # pytest [tool:pytest] -addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html -;addopts = -s -vv --xdoc +;addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html +addopts = -s -vv --xdoc # above is good for pycharm environments. filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning From bdf6751f681aee3c1d47875c449a1f9a253cf92b Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Fri, 23 Aug 2019 15:07:40 -0400 Subject: [PATCH 37/37] fixing setup.cfg --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index dc8ffcd..b943875 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,8 +33,8 @@ per-file-ignores = # pytest [tool:pytest] -;addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html -addopts = -s -vv --xdoc +addopts = -v -x --xdoc --cov=foreshadow --cov-config=setup.cfg --cov-report=term --cov-report=html +;addopts = -s -vv --xdoc # above is good for pycharm environments. filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning