microsoft · romanlutz · Feb 9, 2022 · Feb 9, 2022 · riedgar-ms · Feb 9, 2022
@@ -34,7 +34,7 @@ def filter_from_cohort(analyzer, filters, composite_filters):
     if not isinstance(df, pd.DataFrame):
         df = pd.DataFrame(df, columns=feature_names)
     else:
-        # Note: we make a non-deep copy of the input dataframe since
+        # Note: we make a non-deep copy of the input DataFrame since
         # we will add columns below
         df = df.copy()
     add_filter_cols(analyzer, df, filters, true_y)

@@ -295,7 +295,7 @@ def traverse(df,
              classes=None):
     """Traverses the current node in the tree to create a list of nodes.
 
-    :param df: The dataframe containing the features and labels.
+    :param df: The DataFrame containing the features and labels.
     :type df: pandas.DataFrame
     :param tree: The current node in the tree to traverse.
     :type tree: dict
@@ -522,9 +522,9 @@ def node_to_dict(df, tree, nodeid, categories, json,
 
 
 def get_regression_metric_data(df):
-    """Compute regression metric data from a dataframe.
+    """Compute regression metric data from a DataFrame.
 
-    :param df: dataframe
+    :param df: DataFrame
     :type df: pandas.DataFrame
     :return: pred_y, true_y, error
     :rtype: numpy.ndarray, numpy.ndarray, int
@@ -537,9 +537,9 @@ def get_regression_metric_data(df):
 
 
 def get_classification_metric_data(df):
-    """Compute classification metric data from a dataframe.
+    """Compute classification metric data from a DataFrame.
 
-    :param df: dataframe
+    :param df: DataFrame
     :type df: pandas.DataFrame
     :return: pred_y, true_y, error
     :rtype: numpy.ndarray, numpy.ndarray, int

diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-diabetes-decision-making.ipynb
@@ -66,7 +66,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns."
+    "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns."
    ]
   },
   {

diff --git a/...s/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb b/...s/responsibleaidashboard/responsibleaidashboard-diabetes-regression-model-debugging.ipynb
@@ -66,7 +66,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns."
+    "First, load the diabetes dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns."
    ]
   },
   {

diff --git a/...esponsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb b/...esponsibleaidashboard/responsibleaidashboard-housing-classification-model-debugging.ipynb
@@ -62,7 +62,7 @@
    "id": "927997ce",
    "metadata": {},
    "source": [
-    "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data."
+    "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data."
    ]
   },
   {

diff --git a/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb b/notebooks/responsibleaidashboard/responsibleaidashboard-housing-decision-making.ipynb
@@ -50,7 +50,7 @@
    "id": "927997ce",
    "metadata": {},
    "source": [
-    "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a dataframe with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data."
+    "First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data."
    ]
   },
   {

@@ -25,7 +25,7 @@
 
 
 class CausalManager(BaseManager):
-    """Manager for causal analysis."""
+    """Manager for generating causal analyses from a dataset."""
 
     def __init__(
         self,
@@ -36,12 +36,12 @@ def __init__(
         categorical_features: Optional[List[str]]
     ):
         """Construct a CausalManager for generating causal analyses
-           from a dataset.
+            from a dataset.
         :param train: Dataset on which to compute global causal effects
-                     (#samples x #features).
+            (#samples x #features).
         :type train: pandas.DataFrame
         :param test: Dataset on which to compute local causal effects
-                     (#samples x #features).
+            (#samples x #features).
         :type test: pandas.DataFrame
         :param target_column: The name of the label column.
         :type target_column: str

@@ -108,6 +108,7 @@ def _get_schema_version(counterfactuals_dict):
 
 
 class CounterfactualConfig(BaseConfig):
+    """Defines the configuration for generating counterfactuals."""
     METHOD = 'method'
     CONTINUOUS_FEATURES = 'continuous_features'
     TOTAL_CFS = 'total_CFs'
@@ -319,6 +320,9 @@ def load_result(self, data_directory_path):
 
 
 class CounterfactualManager(BaseManager):
+    """Defines the CounterfactualManager for generating counterfactuals
+        from a model.
+    """
     _TRAIN = '_train'
     _TEST = '_test'
     _MODEL = '_model'
@@ -329,8 +333,7 @@ class CounterfactualManager(BaseManager):
 
     def __init__(self, model, train, test, target_column, task_type,
                  categorical_features):
-        """Defines the CounterfactualManager for generating counterfactuals
-           from a model.
+        """Creates a CounterfactualManager object.
 
         :param model: The model to generate counterfactuals from.
             A model that implements sklearn.predict or sklearn.predict_proba

@@ -81,24 +81,11 @@ def as_error_config(json_dict):
 
 
 class ErrorAnalysisConfig(BaseConfig):
-
-    """Defines the ErrorAnalysisConfig, specifying the parameters to run.
-
-    :param max_depth: The maximum depth of the tree.
-    :type max_depth: int
-    :param num_leaves: The number of leaves in the tree.
-    :type num_leaves: int
-    :param min_child_samples: The minimal number of data required to
-        create one leaf.
-    :type min_child_samples: int
-    :param filter_features: One or two features to use for the
-        matrix filter.
-    :type filter_features: list
-    """
+    """Defines the ErrorAnalysisConfig, specifying the parameters to run."""
 
     def __init__(self, max_depth, num_leaves,
                  min_child_samples, filter_features):
-        """Defines the ErrorAnalysisConfig, specifying the parameters to run.
+        """Creates an ErrorAnalysisConfig, specifying the parameters to run.
 
         :param max_depth: The maximum depth of the tree.
         :type max_depth: int
@@ -165,28 +152,11 @@ def from_json(json_str):
 
 
 class ErrorAnalysisManager(BaseManager):
-
-    """Defines the ErrorAnalysisManager for discovering errors in a model.
-
-    :param model: The model to analyze errors on.
-        A model that implements sklearn.predict or sklearn.predict_proba
-        or function that accepts a 2d ndarray.
-    :type model: object
-    :param dataset: The dataset including the label column.
-    :type dataset: pandas.DataFrame
-    :param target_column: The name of the label column.
-    :type target_column: str
-    :param classes: Class names as a list of strings.
-        The order of the class names should match that of the model
-        output.  Only required if analyzing a classifier.
-    :type classes: list
-    :param categorical_features: The categorical feature names.
-    :type categorical_features: list[str]
-    """
+    """Defines the ErrorAnalysisManager for discovering errors in a model."""
 
     def __init__(self, model, dataset, target_column, classes=None,
                  categorical_features=None):
-        """Defines the ErrorAnalysisManager for discovering errors in a model.
+        """Creates an ErrorAnalysisManager object.
 
         :param model: The model to analyze errors on.
             A model that implements sklearn.predict or sklearn.predict_proba

@@ -45,33 +45,11 @@
 
 class ExplainerManager(BaseManager):
 
-    """Defines the ExplainerManager for explaining a model.
-
-    :param model: The model to explain.
-        A model that implements sklearn.predict or sklearn.predict_proba
-        or function that accepts a 2d ndarray.
-    :type model: object
-    :param initialization_examples: A matrix of feature vector
-        examples (# examples x # features) for initializing the explainer,
-        with an additional label column.
-    :type initialization_examples: pandas.DataFrame
-    :param evaluation_examples: A matrix of feature vector
-        examples (# examples x # features) on which to explain the
-        model's output, with an additional label column.
-    :type evaluation_examples: pandas.DataFrame
-    :param target_column: The name of the label column.
-    :type target_column: str
-    :param classes: Class names as a list of strings.
-        The order of the class names should match that of the model
-        output.  Only required if explaining classifier.
-    :type classes: list
-    :param categorical_features: The categorical feature names.
-    :type categorical_features: list[str]
-    """
+    """Defines the ExplainerManager for explaining a model."""
 
     def __init__(self, model, initialization_examples, evaluation_examples,
                  target_column, classes=None, categorical_features=None):
-        """Defines the ExplainerManager for explaining a model.
+        """Creates an ExplainerManager object.
 
         :param model: The model to explain.
             A model that implements sklearn.predict or sklearn.predict_proba

@@ -43,44 +43,17 @@
 
 
 class RAIInsights(object):
-
     """Defines the top-level Model Analysis API.
     Use RAIInsights to analyze errors, explain the most important
     features, compute counterfactuals and run causal analysis in a
     single API.
-    :param model: The model to compute RAI insights for.
-        A model that implements sklearn.predict or sklearn.predict_proba
-        or function that accepts a 2d ndarray.
-    :type model: object
-    :param train: The training dataset including the label column.
-    :type train: pandas.DataFrame
-    :param test: The test dataset including the label column.
-    :type test: pandas.DataFrame
-    :param target_column: The name of the label column.
-    :type target_column: str
-    :param task_type: The task to run, can be `classification` or
-        `regression`.
-    :type task_type: str
-    :param categorical_features: The categorical feature names.
-    :type categorical_features: list[str]
-    :param classes: The class labels in the training dataset
-    :type classes: ndarray
-    :param serializer: Picklable custom serializer with save and load
-        methods for custom model serialization.
-        The save method writes the model to file given a parent directory.
-        The load method returns the deserialized model from the same
-        parent directory.
-    :type serializer: object
     """
 
     def __init__(self, model, train, test, target_column,
                  task_type, categorical_features=None, classes=None,
                  serializer=None,
                  maximum_rows_for_test: int = 5000):
-        """Defines the top-level Model Analysis API.
-        Use RAIInsights to analyze errors, explain the most important
-        features, compute counterfactuals and run causal analysis in a
-        single API.
+        """Creates an RAIInsights object.
         :param model: The model to compute RAI insights for.
             A model that implements sklearn.predict or sklearn.predict_proba
             or function that accepts a 2d ndarray.
@@ -99,9 +72,10 @@ def __init__(self, model, train, test, target_column,
         :param classes: The class labels in the training dataset
         :type classes: ndarray
         :param serializer: Picklable custom serializer with save and load
-            methods defined for model that is not serializable. The save
-            method returns a dictionary state and load method returns the
-            model.
+            methods for custom model serialization.
+            The save method writes the model to file given a parent directory.
+            The load method returns the deserialized model from the same
+            parent directory.
         :type serializer: object
         :param maximum_rows_for_test: Limit on size of test data
             (for performance reasons)
@@ -172,7 +146,7 @@ def _validate_model_analysis_input_parameters(
             serializer,
             maximum_rows_for_test: int):
         """
-        Validate the inputs for RAIInsights class.
+        Validate the inputs for the RAIInsights constructor.
 
         :param model: The model to compute RAI insights for.
             A model that implements sklearn.predict or sklearn.predict_proba
@@ -350,18 +324,18 @@ def _validate_model_analysis_input_parameters(
         else:
             raise UserConfigValidationException(
                 "Unsupported data type for either train or test. "
-                "Expecting pandas Dataframe for train and test."
+                "Expecting pandas DataFrame for train and test."
             )
 
     def _validate_features_same(self, small_train_features_before,
                                 small_train_data, function):
         """
-        Validate the features are unmodified on the dataframe.
+        Validate the features are unmodified on the DataFrame.
 
         :param small_train_features_before: The features saved before
             an operation was performed.
         :type small_train_features_before: list[str]
-        :param small_train_data: The dataframe after the operation.
+        :param small_train_data: The DataFrame after the operation.
         :type small_train_data: pandas.DataFrame
         :param function: The name of the operation performed.
         :type function: str

@@ -279,7 +279,7 @@ def test_unsupported_train_test_types(self):
                 target_column='target',
                 task_type='classification')
         assert "Unsupported data type for either train or test. " + \
-            "Expecting pandas Dataframe for train and test." in str(ucve.value)
+            "Expecting pandas DataFrame for train and test." in str(ucve.value)
 
     def test_train_labels(self):
         X_train, X_test, y_train, y_test, _, _ = \

@@ -321,7 +321,7 @@ def test_unsupported_train_test_types(self):
                 task_type='classification')
 
         assert "Unsupported data type for either train or test. " + \
-            "Expecting pandas Dataframe for train and test." in str(ucve.value)
+            "Expecting pandas DataFrame for train and test." in str(ucve.value)
 
     def test_classes_exceptions(self):
         X_train, X_test, y_train, y_test, _, _ = \