Skip to content
This repository has been archived by the owner on Mar 10, 2024. It is now read-only.

Commit

Permalink
[fix] Fix the task inference issue mentioned in automl#352
Browse files Browse the repository at this point in the history
Since sklearn task inference regards targets with integers as
a classification task, I modified target_validator so that we always
cast targets for regression to float.
This workaround is mentioned in the reference below:
scikit-learn/scikit-learn#8952
  • Loading branch information
nabenabe0928 committed Feb 22, 2022
1 parent b5c1757 commit 1d14de3
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 84 deletions.
18 changes: 9 additions & 9 deletions autoPyTorch/data/base_feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_FEAT_TYPES = Union[
SupportedFeatTypes = Union[
List,
pd.DataFrame,
np.ndarray,
Expand Down Expand Up @@ -68,19 +68,19 @@ def __init__(

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
X_train: SupportedFeatTypes,
X_test: Optional[SupportedFeatTypes] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features.
The supported data types are List, numpy arrays and pandas DataFrames.
CSR sparse data types are also supported
Args:
X_train (SUPPORTED_FEAT_TYPES):
X_train (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
X_test (Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SupportedFeatTypes]):
A hold out set of data used for checking
"""

Expand Down Expand Up @@ -109,11 +109,11 @@ def fit(

def _fit(
self,
X: SUPPORTED_FEAT_TYPES,
X: SupportedFeatTypes,
) -> BaseEstimator:
"""
Args:
X (SUPPORTED_FEAT_TYPES):
X (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
Returns:
Expand All @@ -124,11 +124,11 @@ def _fit(

def transform(
self,
X: SUPPORTED_FEAT_TYPES,
X: SupportedFeatTypes,
) -> np.ndarray:
"""
Args:
X_train (SUPPORTED_FEAT_TYPES):
X_train (SupportedFeatTypes):
A set of features, whose categorical features are going to be
transformed
Expand Down
24 changes: 12 additions & 12 deletions autoPyTorch/data/base_target_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from autoPyTorch.utils.logging_ import PicklableClientLogger


SUPPORTED_TARGET_TYPES = Union[
SupportedTargetTypes = Union[
List,
pd.Series,
pd.DataFrame,
Expand Down Expand Up @@ -69,17 +69,17 @@ def __init__(self,

def fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
y_train: SupportedTargetTypes,
y_test: Optional[SupportedTargetTypes] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the targets
The supported data types are List, numpy arrays and pandas DataFrames.
Args:
y_train (SUPPORTED_TARGET_TYPES)
y_train (SupportedTargetTypes)
A set of targets set aside for training
y_test (Union[SUPPORTED_TARGET_TYPES])
y_test (Union[SupportedTargetTypes])
A hold out set of data used of the targets. It is also used to fit the
categories of the encoder.
"""
Expand Down Expand Up @@ -128,26 +128,26 @@ def fit(

def _fit(
self,
y_train: SUPPORTED_TARGET_TYPES,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
y_train: SupportedTargetTypes,
y_test: Optional[SupportedTargetTypes] = None,
) -> BaseEstimator:
"""
Args:
y_train (SUPPORTED_TARGET_TYPES)
y_train (SupportedTargetTypes)
The labels of the current task. They are going to be encoded in case
of classification
y_test (Optional[SUPPORTED_TARGET_TYPES])
y_test (Optional[SupportedTargetTypes])
A holdout set of labels
"""
raise NotImplementedError()

def transform(
self,
y: Union[SUPPORTED_TARGET_TYPES],
y: Union[SupportedTargetTypes],
) -> np.ndarray:
"""
Args:
y (SUPPORTED_TARGET_TYPES)
y (SupportedTargetTypes)
A set of targets that are going to be encoded if the current task
is classification
Returns:
Expand All @@ -158,7 +158,7 @@ def transform(

def inverse_transform(
self,
y: SUPPORTED_TARGET_TYPES,
y: SupportedTargetTypes,
) -> np.ndarray:
"""
Revert any encoding transformation done on a target array
Expand Down
28 changes: 14 additions & 14 deletions autoPyTorch/data/base_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from sklearn.base import BaseEstimator
from sklearn.exceptions import NotFittedError

from autoPyTorch.data.base_feature_validator import SUPPORTED_FEAT_TYPES
from autoPyTorch.data.base_target_validator import SUPPORTED_TARGET_TYPES
from autoPyTorch.data.base_feature_validator import SupportedFeatTypes
from autoPyTorch.data.base_target_validator import SupportedTargetTypes


class BaseInputValidator(BaseEstimator):
Expand Down Expand Up @@ -40,10 +40,10 @@ def __init__(

def fit(
self,
X_train: SUPPORTED_FEAT_TYPES,
y_train: SUPPORTED_TARGET_TYPES,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
X_train: SupportedFeatTypes,
y_train: SupportedTargetTypes,
X_test: Optional[SupportedFeatTypes] = None,
y_test: Optional[SupportedTargetTypes] = None,
) -> BaseEstimator:
"""
Validates and fit a categorical encoder (if needed) to the features, and
Expand All @@ -59,15 +59,15 @@ def fit(
+ If performing a classification task, the data is going to be encoded
Args:
X_train (SUPPORTED_FEAT_TYPES):
X_train (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks). If this data contains categorical columns, an encoder is going to
be instantiated and trained with this data.
y_train (SUPPORTED_TARGET_TYPES):
y_train (SupportedTargetTypes):
A set of targets that are going to be encoded if the task is for classification
X_test (Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SupportedFeatTypes]):
A hold out set of features used for checking
y_test (SUPPORTED_TARGET_TYPES):
y_test (SupportedTargetTypes):
A hold out set of targets used for checking. Additionally, if the current task
is a classification task, this y_test categories are also going to be used to
fit a pre-processing encoding (to prevent errors on unseen classes).
Expand Down Expand Up @@ -96,16 +96,16 @@ def fit(

def transform(
self,
X: SUPPORTED_FEAT_TYPES,
y: Optional[SUPPORTED_TARGET_TYPES] = None,
X: SupportedFeatTypes,
y: Optional[SupportedTargetTypes] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Transform the given target or features to a numpy array
Args:
X (SUPPORTED_FEAT_TYPES):
X (SupportedFeatTypes):
A set of features to transform
y (Optional[SUPPORTED_TARGET_TYPES]):
y (Optional[SupportedTargetTypes]):
A set of targets to transform
Returns:
Expand Down
22 changes: 11 additions & 11 deletions autoPyTorch/data/tabular_feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline

from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SupportedFeatTypes


def _create_column_transformer(
Expand Down Expand Up @@ -117,15 +117,15 @@ def _comparator(cmp1: str, cmp2: str) -> int:

def _fit(
self,
X: SUPPORTED_FEAT_TYPES,
X: SupportedFeatTypes,
) -> BaseEstimator:
"""
In case input data is a pandas DataFrame, this utility encodes the user provided
features (from categorical for example) to a numerical value that further stages
will be able to use
Args:
X (SUPPORTED_FEAT_TYPES):
X (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks) and an encoder fitted in the case the data needs encoding
Expand Down Expand Up @@ -204,14 +204,14 @@ def _fit(

def transform(
self,
X: SUPPORTED_FEAT_TYPES,
X: SupportedFeatTypes,
) -> np.ndarray:
"""
Validates and fit a categorical encoder (if needed) to the features.
The supported data types are List, numpy arrays and pandas DataFrames.
Args:
X_train (SUPPORTED_FEAT_TYPES):
X_train (SupportedFeatTypes):
A set of features, whose categorical features are going to be
transformed
Expand Down Expand Up @@ -276,13 +276,13 @@ def transform(

def _check_data(
self,
X: SUPPORTED_FEAT_TYPES,
X: SupportedFeatTypes,
) -> None:
"""
Feature dimensionality and data type checks
Args:
X (SUPPORTED_FEAT_TYPES):
X (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks) and an encoder fitted in the case the data needs encoding
"""
Expand Down Expand Up @@ -429,19 +429,19 @@ def _get_columns_to_encode(

def list_to_dataframe(
self,
X_train: SUPPORTED_FEAT_TYPES,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
X_train: SupportedFeatTypes,
X_test: Optional[SupportedFeatTypes] = None,
) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""
Converts a list to a pandas DataFrame. In this process, column types are inferred.
If test data is provided, we proactively match it to train data
Args:
X_train (SUPPORTED_FEAT_TYPES):
X_train (SupportedFeatTypes):
A set of features that are going to be validated (type and dimensionality
checks) and a encoder fitted in the case the data needs encoding
X_test (Optional[SUPPORTED_FEAT_TYPES]):
X_test (Optional[SupportedFeatTypes]):
A hold out set of data used for checking
Returns:
Expand Down
Loading

0 comments on commit 1d14de3

Please sign in to comment.