Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC add metadata_columns to RAIInsights #1207

Closed
wants to merge 12 commits into from
Closed
Original file line number Diff line number Diff line change
Expand Up @@ -130,23 +130,31 @@ def compute_error_tree(analyzer,
pred_y = filtered_df[PRED_Y]
dropped_cols.append(PRED_Y)
input_data = filtered_df.drop(columns=dropped_cols)

is_pandas = isinstance(analyzer.dataset, pd.DataFrame)
if is_pandas:
true_y = true_y.to_numpy()
else:
input_data = input_data.to_numpy()

if is_model_analyzer:
pred_y = analyzer.model.predict(input_data)
pred_y = analyzer.model.predict(
input_data.drop(columns=analyzer._metadata_columns))

if analyzer.model_task == ModelTask.CLASSIFICATION:
diff = pred_y != true_y
else:
diff = pred_y - true_y

if not isinstance(diff, np.ndarray):
diff = np.array(diff)

if not isinstance(pred_y, np.ndarray):
pred_y = np.array(pred_y)

if not isinstance(true_y, np.ndarray):
true_y = np.array(true_y)

indexes = []
for feature in features:
indexes.append(analyzer.feature_names.index(feature))
Expand Down
26 changes: 24 additions & 2 deletions erroranalysis/erroranalysis/analyzer/error_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class BaseAnalyzer(ABC):
:type feature_names: numpy.ndarray or list[]
:param categorical_features: The categorical feature names.
:type categorical_features: list[str]
:param metadata_columns: The set of columns that are not passed
to the model or explainers. These columns can be used for
other analyses.
romanlutz marked this conversation as resolved.
Show resolved Hide resolved
:type metadata_columns: list[str]
:param model_task: Optional parameter to specify whether the model
is a classification or regression model. In most cases, the
type of the model can be inferred based on the shape of the
Expand Down Expand Up @@ -68,12 +72,14 @@ def __init__(self,
true_y,
feature_names,
categorical_features,
metadata_columns,
model_task,
metric,
classes):
self._dataset = self._make_pandas_copy(dataset)
self._true_y = true_y
self._categorical_features = categorical_features
self._metadata_columns = metadata_columns
if isinstance(feature_names, np.ndarray):
feature_names = feature_names.tolist()
self._feature_names = feature_names
Expand Down Expand Up @@ -445,6 +451,10 @@ class and for the regression case a method of predict()
:type feature_names: numpy.ndarray or list[]
:param categorical_features: The categorical feature names.
:type categorical_features: list[str]
:param metadata_columns: The set of columns that are not passed
to the model or explainers. These columns can be used for
other analyses.
:type metadata_columns: list[str]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optional[list[str]] if None is valid?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although we haven't done that on the others

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually taking the unrelated doc adjustments out of this PR into a separate one. Perhaps I can do that there!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#1214 !

Also, the optional[list[str]] annotations are only used for type annotations on the args in code, not in the docstring, right? I may be wrong...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added the annotations, but there will probably be a few merge conflicts to resolve once #1214 is merged.

:param model_task: Optional parameter to specify whether the model
is a classification or regression model. In most cases, the
type of the model can be inferred based on the shape of the
Expand Down Expand Up @@ -474,6 +484,7 @@ def __init__(self,
true_y,
feature_names,
categorical_features,
metadata_columns=None,
model_task=ModelTask.UNKNOWN,
metric=None,
classes=None):
Expand All @@ -489,6 +500,7 @@ def __init__(self,
true_y,
feature_names,
categorical_features,
metadata_columns,
model_task,
metric,
classes)
Expand Down Expand Up @@ -520,9 +532,13 @@ def get_diff(self):
:rtype: numpy.ndarray
"""
if self._model_task == ModelTask.CLASSIFICATION:
return self.model.predict(self.dataset) != self.true_y
return self.model.predict(
self.dataset.drop(columns=self._metadata_columns)) \
!= self.true_y
else:
return self.model.predict(self.dataset) - self.true_y
return self.model.predict(
self.dataset.drop(columns=self._metadata_columns)) \
- self.true_y


class PredictionsAnalyzer(BaseAnalyzer):
Expand All @@ -543,6 +559,10 @@ class PredictionsAnalyzer(BaseAnalyzer):
:type feature_names: numpy.ndarray or list[]
:param categorical_features: The categorical feature names.
:type categorical_features: list[str]
:param metadata_columns: The set of columns that are not passed
to the model or explainers. These columns can be used for
other analyses.
:type metadata_columns: list[str]
:param model_task: Optional parameter to specify whether the model
is a classification or regression model. In most cases, the
type of the model can be inferred based on the shape of the
Expand Down Expand Up @@ -572,6 +592,7 @@ def __init__(self,
true_y,
feature_names,
categorical_features,
metadata_columns,
model_task=ModelTask.CLASSIFICATION,
metric=None,
classes=None):
Expand All @@ -583,6 +604,7 @@ def __init__(self,
true_y,
feature_names,
categorical_features,
metadata_columns,
model_task,
metric,
classes)
Expand Down
1 change: 1 addition & 0 deletions responsibleai/responsibleai/_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Dataset:
class_names: List[str]
categorical_features: List[str]
target_column: str
metadata_columns: List[List]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is perhaps a place where is should be Optional[List[str]] ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This inspired me to create #1214 . Once that's merged I'll add the same annotations here, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it's List[str] (updated just now). It doesn't need to be optional for this. It can be set as []



class BoundedCoordinates:
Expand Down
1 change: 1 addition & 0 deletions responsibleai/responsibleai/managers/causal_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
):
"""Construct a CausalManager for generating causal analyses
from a dataset.

:param train: Dataset on which to compute global causal effects
(#samples x #features).
:type train: pandas.DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,10 @@ def _load(path, rai_insights):

# Rehydrate model analysis data
inst.__dict__[CounterfactualManager._MODEL] = rai_insights.model
inst.__dict__[CounterfactualManager._TRAIN] = rai_insights.train
inst.__dict__[CounterfactualManager._TEST] = rai_insights.test
inst.__dict__[CounterfactualManager._TRAIN] = \
rai_insights.train.drop(columns=rai_insights.metadata_columns)
romanlutz marked this conversation as resolved.
Show resolved Hide resolved
inst.__dict__[CounterfactualManager._TEST] = \
rai_insights.test.drop(columns=rai_insights.metadata_columns)
inst.__dict__[CounterfactualManager._TARGET_COLUMN] = \
rai_insights.target_column
inst.__dict__[CounterfactualManager._TASK_TYPE] = \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class ErrorAnalysisManager(BaseManager):
"""Defines the ErrorAnalysisManager for discovering errors in a model."""

def __init__(self, model, dataset, target_column, classes=None,
categorical_features=None):
categorical_features=None, metadata_columns=None):
"""Creates an ErrorAnalysisManager object.

:param model: The model to analyze errors on.
Expand All @@ -172,19 +172,25 @@ def __init__(self, model, dataset, target_column, classes=None,
:type classes: list
:param categorical_features: The categorical feature names.
:type categorical_features: list[str]
:param metadata_columns: The set of columns that are not passed
romanlutz marked this conversation as resolved.
Show resolved Hide resolved
to the model or explainers. These columns can be used for
other analyses.
:type metadata_columns: list[str]
"""
self._true_y = dataset[target_column]
self._dataset = dataset.drop(columns=[target_column])
self._feature_names = list(self._dataset.columns)
self._classes = classes
self._categorical_features = categorical_features
self._metadata_columns = metadata_columns or []
self._ea_config_list = []
self._ea_report_list = []
self._analyzer = ModelAnalyzer(model,
self._dataset,
self._true_y,
self._feature_names,
self._categorical_features,
self._metadata_columns,
classes=self._classes)

def add(self, max_depth=3, num_leaves=31,
Expand Down
9 changes: 5 additions & 4 deletions responsibleai/responsibleai/managers/explainer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@


class ExplainerManager(BaseManager):

"""Defines the ExplainerManager for explaining a model."""

def __init__(self, model, initialization_examples, evaluation_examples,
Expand Down Expand Up @@ -343,9 +342,11 @@ def _load(path, rai_insights):
inst.__dict__['_' + CLASSES] = rai_insights._classes
inst.__dict__['_' + CATEGORICAL_FEATURES] = \
rai_insights.categorical_features
target_column = rai_insights.target_column
train = rai_insights.train.drop(columns=[target_column])
test = rai_insights.test.drop(columns=[target_column])
exclude_columns = [rai_insights.target_column]
if rai_insights.metadata_columns:
exclude_columns += rai_insights.metadata_columns
train = rai_insights.train.drop(columns=exclude_columns)
test = rai_insights.test.drop(columns=exclude_columns)
inst.__dict__[U_INITIALIZATION_EXAMPLES] = train
inst.__dict__[U_EVALUATION_EXAMPLES] = test
inst.__dict__['_' + FEATURES] = list(train.columns)
Expand Down
Loading