diff --git a/erroranalysis/erroranalysis/_internal/cohort_filter.py b/erroranalysis/erroranalysis/_internal/cohort_filter.py index dc011ff56a..a44c1e9564 100644 --- a/erroranalysis/erroranalysis/_internal/cohort_filter.py +++ b/erroranalysis/erroranalysis/_internal/cohort_filter.py @@ -34,7 +34,7 @@ def filter_from_cohort(analyzer, filters, composite_filters): if not isinstance(df, pd.DataFrame): df = pd.DataFrame(df, columns=feature_names) else: - # Note: we make a non-deep copy of the input dataframe since + # Note: we make a non-deep copy of the input DataFrame since # we will add columns below df = df.copy() add_filter_cols(analyzer, df, filters, true_y) diff --git a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py index 9393402f7b..16e6155f83 100644 --- a/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py +++ b/erroranalysis/erroranalysis/_internal/surrogate_error_tree.py @@ -295,7 +295,7 @@ def traverse(df, classes=None): """Traverses the current node in the tree to create a list of nodes. - :param df: The dataframe containing the features and labels. + :param df: The DataFrame containing the features and labels. :type df: pandas.DataFrame :param tree: The current node in the tree to traverse. :type tree: dict @@ -522,9 +522,9 @@ def node_to_dict(df, tree, nodeid, categories, json, def get_regression_metric_data(df): - """Compute regression metric data from a dataframe. + """Compute regression metric data from a DataFrame. - :param df: dataframe + :param df: DataFrame :type df: pandas.DataFrame :return: pred_y, true_y, error :rtype: numpy.ndarray, numpy.ndarray, int @@ -537,9 +537,9 @@ def get_regression_metric_data(df): def get_classification_metric_data(df): - """Compute classification metric data from a dataframe. + """Compute classification metric data from a DataFrame. - :param df: dataframe + :param df: DataFrame :type df: pandas.DataFrame :return: pred_y, true_y, error :rtype: numpy.ndarray, numpy.ndarray, int diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb index 8f21a6e661..960585f6e9 100644 --- a/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb +++ b/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb @@ -66,7 +66,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns." + "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns." ] }, { diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb index 48a95a2a4d..e232166d21 100644 --- a/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb @@ -66,7 +66,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns." + "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns." ] }, { diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb index 2a3ef1491f..e0a5ec72f9 100644 --- a/notebooks/responsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb @@ -62,7 +62,7 @@ "id": "927997ce", "metadata": {}, "source": [ - "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data." + "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data." ] }, { diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb index ac23e63492..57331d4245 100644 --- a/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb +++ b/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb @@ -50,7 +50,7 @@ "id": "927997ce", "metadata": {}, "source": [ - "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data." + "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data." ] }, { diff --git a/responsibleai/responsibleai/managers/causal_manager.py b/responsibleai/responsibleai/managers/causal_manager.py index ef1d3d0110..b3581ec559 100644 --- a/responsibleai/responsibleai/managers/causal_manager.py +++ b/responsibleai/responsibleai/managers/causal_manager.py @@ -25,7 +25,7 @@ class CausalManager(BaseManager): - """Manager for causal analysis.""" + """Manager for generating causal analyses from a dataset.""" def __init__( self, @@ -36,12 +36,12 @@ def __init__( categorical_features: Optional[List[str]] ): """Construct a CausalManager for generating causal analyses - from a dataset. + from a dataset. :param train: Dataset on which to compute global causal effects - (#samples x #features). + (#samples x #features). :type train: pandas.DataFrame :param test: Dataset on which to compute local causal effects - (#samples x #features). + (#samples x #features). :type test: pandas.DataFrame :param target_column: The name of the label column. :type target_column: str diff --git a/responsibleai/responsibleai/managers/counterfactual_manager.py b/responsibleai/responsibleai/managers/counterfactual_manager.py index 843e13c053..c0a4d013b5 100644 --- a/responsibleai/responsibleai/managers/counterfactual_manager.py +++ b/responsibleai/responsibleai/managers/counterfactual_manager.py @@ -108,6 +108,7 @@ def _get_schema_version(counterfactuals_dict): class CounterfactualConfig(BaseConfig): + """Defines the configuration for generating counterfactuals.""" METHOD = 'method' CONTINUOUS_FEATURES = 'continuous_features' TOTAL_CFS = 'total_CFs' @@ -319,6 +320,9 @@ def load_result(self, data_directory_path): class CounterfactualManager(BaseManager): + """Defines the CounterfactualManager for generating counterfactuals + from a model. + """ _TRAIN = '_train' _TEST = '_test' _MODEL = '_model' @@ -329,8 +333,7 @@ class CounterfactualManager(BaseManager): def __init__(self, model, train, test, target_column, task_type, categorical_features): - """Defines the CounterfactualManager for generating counterfactuals - from a model. + """Creates a CounterfactualManager object. :param model: The model to generate counterfactuals from. A model that implements sklearn.predict or sklearn.predict_proba diff --git a/responsibleai/responsibleai/managers/error_analysis_manager.py b/responsibleai/responsibleai/managers/error_analysis_manager.py index 0f7d777312..5cdadbb7fc 100644 --- a/responsibleai/responsibleai/managers/error_analysis_manager.py +++ b/responsibleai/responsibleai/managers/error_analysis_manager.py @@ -81,24 +81,11 @@ def as_error_config(json_dict): class ErrorAnalysisConfig(BaseConfig): - - """Defines the ErrorAnalysisConfig, specifying the parameters to run. - - :param max_depth: The maximum depth of the tree. - :type max_depth: int - :param num_leaves: The number of leaves in the tree. - :type num_leaves: int - :param min_child_samples: The minimal number of data required to - create one leaf. - :type min_child_samples: int - :param filter_features: One or two features to use for the - matrix filter. - :type filter_features: list - """ + """Defines the ErrorAnalysisConfig, specifying the parameters to run.""" def __init__(self, max_depth, num_leaves, min_child_samples, filter_features): - """Defines the ErrorAnalysisConfig, specifying the parameters to run. + """Creates an ErrorAnalysisConfig, specifying the parameters to run. :param max_depth: The maximum depth of the tree. :type max_depth: int @@ -165,28 +152,11 @@ def from_json(json_str): class ErrorAnalysisManager(BaseManager): - - """Defines the ErrorAnalysisManager for discovering errors in a model. - - :param model: The model to analyze errors on. - A model that implements sklearn.predict or sklearn.predict_proba - or function that accepts a 2d ndarray. - :type model: object - :param dataset: The dataset including the label column. - :type dataset: pandas.DataFrame - :param target_column: The name of the label column. - :type target_column: str - :param classes: Class names as a list of strings. - The order of the class names should match that of the model - output. Only required if analyzing a classifier. - :type classes: list - :param categorical_features: The categorical feature names. - :type categorical_features: list[str] - """ + """Defines the ErrorAnalysisManager for discovering errors in a model.""" def __init__(self, model, dataset, target_column, classes=None, categorical_features=None): - """Defines the ErrorAnalysisManager for discovering errors in a model. + """Creates an ErrorAnalysisManager object. :param model: The model to analyze errors on. A model that implements sklearn.predict or sklearn.predict_proba diff --git a/responsibleai/responsibleai/managers/explainer_manager.py b/responsibleai/responsibleai/managers/explainer_manager.py index f3fa04ea45..a18d9c7fa5 100644 --- a/responsibleai/responsibleai/managers/explainer_manager.py +++ b/responsibleai/responsibleai/managers/explainer_manager.py @@ -45,33 +45,11 @@ class ExplainerManager(BaseManager): - """Defines the ExplainerManager for explaining a model. - - :param model: The model to explain. - A model that implements sklearn.predict or sklearn.predict_proba - or function that accepts a 2d ndarray. - :type model: object - :param initialization_examples: A matrix of feature vector - examples (# examples x # features) for initializing the explainer, - with an additional label column. - :type initialization_examples: pandas.DataFrame - :param evaluation_examples: A matrix of feature vector - examples (# examples x # features) on which to explain the - model's output, with an additional label column. - :type evaluation_examples: pandas.DataFrame - :param target_column: The name of the label column. - :type target_column: str - :param classes: Class names as a list of strings. - The order of the class names should match that of the model - output. Only required if explaining classifier. - :type classes: list - :param categorical_features: The categorical feature names. - :type categorical_features: list[str] - """ + """Defines the ExplainerManager for explaining a model.""" def __init__(self, model, initialization_examples, evaluation_examples, target_column, classes=None, categorical_features=None): - """Defines the ExplainerManager for explaining a model. + """Creates an ExplainerManager object. :param model: The model to explain. A model that implements sklearn.predict or sklearn.predict_proba diff --git a/responsibleai/responsibleai/rai_insights/rai_insights.py b/responsibleai/responsibleai/rai_insights/rai_insights.py index 948c439fda..4ec68e91e0 100644 --- a/responsibleai/responsibleai/rai_insights/rai_insights.py +++ b/responsibleai/responsibleai/rai_insights/rai_insights.py @@ -43,44 +43,17 @@ class RAIInsights(object): - """Defines the top-level Model Analysis API. Use RAIInsights to analyze errors, explain the most important features, compute counterfactuals and run causal analysis in a single API. - :param model: The model to compute RAI insights for. - A model that implements sklearn.predict or sklearn.predict_proba - or function that accepts a 2d ndarray. - :type model: object - :param train: The training dataset including the label column. - :type train: pandas.DataFrame - :param test: The test dataset including the label column. - :type test: pandas.DataFrame - :param target_column: The name of the label column. - :type target_column: str - :param task_type: The task to run, can be `classification` or - `regression`. - :type task_type: str - :param categorical_features: The categorical feature names. - :type categorical_features: list[str] - :param classes: The class labels in the training dataset - :type classes: ndarray - :param serializer: Picklable custom serializer with save and load - methods for custom model serialization. - The save method writes the model to file given a parent directory. - The load method returns the deserialized model from the same - parent directory. - :type serializer: object """ def __init__(self, model, train, test, target_column, task_type, categorical_features=None, classes=None, serializer=None, maximum_rows_for_test: int = 5000): - """Defines the top-level Model Analysis API. - Use RAIInsights to analyze errors, explain the most important - features, compute counterfactuals and run causal analysis in a - single API. + """Creates an RAIInsights object. :param model: The model to compute RAI insights for. A model that implements sklearn.predict or sklearn.predict_proba or function that accepts a 2d ndarray. @@ -99,9 +72,10 @@ def __init__(self, model, train, test, target_column, :param classes: The class labels in the training dataset :type classes: ndarray :param serializer: Picklable custom serializer with save and load - methods defined for model that is not serializable. The save - method returns a dictionary state and load method returns the - model. + methods for custom model serialization. + The save method writes the model to file given a parent directory. + The load method returns the deserialized model from the same + parent directory. :type serializer: object :param maximum_rows_for_test: Limit on size of test data (for performance reasons) @@ -172,7 +146,7 @@ def _validate_model_analysis_input_parameters( serializer, maximum_rows_for_test: int): """ - Validate the inputs for RAIInsights class. + Validate the inputs for the RAIInsights constructor. :param model: The model to compute RAI insights for. A model that implements sklearn.predict or sklearn.predict_proba @@ -350,18 +324,18 @@ def _validate_model_analysis_input_parameters( else: raise UserConfigValidationException( "Unsupported data type for either train or test. " - "Expecting pandas Dataframe for train and test." + "Expecting pandas DataFrame for train and test." ) def _validate_features_same(self, small_train_features_before, small_train_data, function): """ - Validate the features are unmodified on the dataframe. + Validate the features are unmodified on the DataFrame. :param small_train_features_before: The features saved before an operation was performed. :type small_train_features_before: list[str] - :param small_train_data: The dataframe after the operation. + :param small_train_data: The DataFrame after the operation. :type small_train_data: pandas.DataFrame :param function: The name of the operation performed. :type function: str diff --git a/responsibleai/tests/test_model_analysis_validations.py b/responsibleai/tests/test_model_analysis_validations.py index a3474e09fd..91ad4b3d7d 100644 --- a/responsibleai/tests/test_model_analysis_validations.py +++ b/responsibleai/tests/test_model_analysis_validations.py @@ -279,7 +279,7 @@ def test_unsupported_train_test_types(self): target_column='target', task_type='classification') assert "Unsupported data type for either train or test. " + \ - "Expecting pandas Dataframe for train and test." in str(ucve.value) + "Expecting pandas DataFrame for train and test." in str(ucve.value) def test_train_labels(self): X_train, X_test, y_train, y_test, _, _ = \ diff --git a/responsibleai/tests/test_rai_insights_validations.py b/responsibleai/tests/test_rai_insights_validations.py index 8cc9d93f06..5e5ab7529a 100644 --- a/responsibleai/tests/test_rai_insights_validations.py +++ b/responsibleai/tests/test_rai_insights_validations.py @@ -321,7 +321,7 @@ def test_unsupported_train_test_types(self): task_type='classification') assert "Unsupported data type for either train or test. " + \ - "Expecting pandas Dataframe for train and test." in str(ucve.value) + "Expecting pandas DataFrame for train and test." in str(ucve.value) def test_classes_exceptions(self): X_train, X_test, y_train, y_test, _, _ = \