From 26a00815f34825b9c76a9feba23d8a0b593a747e Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Fri, 29 Sep 2023 10:25:11 -0400 Subject: [PATCH 01/13] feat: add a config setting to disable preprocessing value clampings --- rsmtool/preprocessor.py | 48 ++++++++++++++----- rsmtool/utils/constants.py | 12 ++++- .../autogenerated_rsmexplain_config.json | 3 +- .../output/interactive_rsmexplain_config.json | 3 +- 4 files changed, 51 insertions(+), 15 deletions(-) diff --git a/rsmtool/preprocessor.py b/rsmtool/preprocessor.py index d351f7f7d..7786fc116 100644 --- a/rsmtool/preprocessor.py +++ b/rsmtool/preprocessor.py @@ -1020,6 +1020,7 @@ def preprocess_feature( exclude_zero_sd=False, raise_error=True, truncations=None, + clamp_features=True, ): """ Remove outliers and transform the values in given numpy array. @@ -1050,6 +1051,9 @@ def preprocess_feature( truncations : pandas DataFrame, optional A set of pre-defined truncation values. Defaults to ``None``. + clamp_features : bool, optional + Clamp outlier values if set in the config file + Defaults to ``True``. Returns ------- @@ -1063,16 +1067,21 @@ def preprocess_feature( If the preprocessed feature values have zero standard deviation and ``exclude_zero_sd`` is set to ``True``. """ - if truncations is not None: - # clamp outlier values using the truncations set - features_no_outliers = self.remove_outliers_using_truncations( - values, feature_name, truncations - ) + if clamp_features: + if truncations is not None: + # clamp outlier values using the truncations set + features_no_outliers = self.remove_outliers_using_truncations( + values, feature_name, truncations + ) + else: + # clamp any outlier values that are 4 standard deviations + # away from the mean + features_no_outliers = self.remove_outliers( + values, mean=feature_mean, sd=feature_sd + ) else: - # clamp any outlier values that are 4 standard deviations - # away from the mean - features_no_outliers = self.remove_outliers(values, mean=feature_mean, sd=feature_sd) + features_no_outliers = values # apply the requested transformation to the feature transformed_feature = FeatureTransformer().transform_feature( @@ -1105,6 +1114,7 @@ def preprocess_features( df_feature_specs, standardize_features=True, use_truncations=False, + clamp_features=True, ): """ Preprocess features in given data using corresponding specifications. @@ -1132,6 +1142,10 @@ def preprocess_features( Whether we should use the truncation set for removing outliers. Defaults to ``False``. + clamp_features : bool, optional + Clamp outlier values if set in the config file + Defaults to ``True``. + Returns ------- @@ -1178,6 +1192,7 @@ def preprocess_features( train_feature_sd, exclude_zero_sd=True, truncations=truncations, + clamp_features=clamp_features, ) testing_feature_values = df_test[feature_name].values @@ -1188,6 +1203,7 @@ def preprocess_features( train_feature_mean, train_feature_sd, truncations=truncations, + clamp_features=clamp_features, ) # Standardize the features using the mean and sd computed on the @@ -2646,6 +2662,9 @@ def process_data_rsmexplain(self, config_obj, data_container_obj): # should features be standardized? standardize_features = config_obj.get("standardize_features", True) + # should features be clamped? + clamp_features = config_obj.get("clamp_features", True) + # rename the ID columns in both frames df_background_preprocessed = self.rename_default_columns( df_background_features, @@ -2689,10 +2708,10 @@ def process_data_rsmexplain(self, config_obj, data_container_obj): # now pre-process all the features that go into the model (df_background_preprocessed, _) = self.preprocess_new_data( - df_background_preprocessed, df_feature_info, standardize_features + df_background_preprocessed, df_feature_info, standardize_features, clamp_features ) (df_explain_preprocessed, _) = self.preprocess_new_data( - df_explain_preprocessed, df_feature_info, standardize_features + df_explain_preprocessed, df_feature_info, standardize_features, clamp_features ) # set ID column as index for the background and explain feature frames @@ -2748,7 +2767,9 @@ def process_data(self, config_obj, data_container_obj, context="rsmtool"): f"'rsmeval', 'rsmpredict', 'rsmexplain']. You specified `{context}`." ) - def preprocess_new_data(self, df_input, df_feature_info, standardize_features=True): + def preprocess_new_data( + self, df_input, df_feature_info, standardize_features=True, clamp_features=True + ): """ Preprocess feature values using the parameters in ``df_feature_info``. @@ -2780,6 +2801,10 @@ def preprocess_new_data(self, df_input, df_feature_info, standardize_features=Tr Whether the features should be standardized prior to prediction. Defaults to ``True``. + clamp_features : bool, optional + Whether the features should be clamped prior to prediction. + Defaults to ``True``. + Returns ------- df_features_preprocessed : pandas DataFrame @@ -2881,6 +2906,7 @@ def preprocess_new_data(self, df_input, df_feature_info, standardize_features=Tr train_feature_sd, exclude_zero_sd=False, raise_error=False, + clamp_features=clamp_features, ) # filter the feature values once again to remove possible NaN and inf values that diff --git a/rsmtool/utils/constants.py b/rsmtool/utils/constants.py index cdcab9be5..ec9819c24 100644 --- a/rsmtool/utils/constants.py +++ b/rsmtool/utils/constants.py @@ -26,6 +26,7 @@ "use_scaled_predictions_new": False, "select_transformations": False, "standardize_features": True, + "clamp_features": True, "use_thumbnails": False, "use_truncation_thresholds": False, "scale_with": None, @@ -289,6 +290,7 @@ "use_wandb", "wandb_project", "wandb_entity", + "clamp_features", ], }, } @@ -421,12 +423,18 @@ }, "explain_data": {"label": "Path to file to be explained ", "type": "file"}, "sample_range": {"label": "Range of specific row IDs to explain "}, - "sample_size": {"label": "Size of random sample to be explained ", "type": "integer"}, + "sample_size": { + "label": "Size of random sample to be explained ", + "type": "integer", + }, "num_features_to_display": { "label": "Number of features to be displayed in plots (15)", "type": "integer", }, - "show_auto_cohorts": {"label": "Show auto cohorts plot (true/false)", "type": "boolean"}, + "show_auto_cohorts": { + "label": "Show auto cohorts plot (true/false)", + "type": "boolean", + }, } # regular expression used to parse rsmexplain range values diff --git a/tests/data/output/autogenerated_rsmexplain_config.json b/tests/data/output/autogenerated_rsmexplain_config.json index 2dd60fd64..a765a8ac8 100644 --- a/tests/data/output/autogenerated_rsmexplain_config.json +++ b/tests/data/output/autogenerated_rsmexplain_config.json @@ -7,6 +7,7 @@ "experiment_dir": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "background_kmeans_size": 500, + "clamp_features": true, "custom_sections": null, "description": "", "general_sections": [ @@ -26,4 +27,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmexplain_config.json b/tests/data/output/interactive_rsmexplain_config.json index 878ddcb5f..e5a2e3d62 100644 --- a/tests/data/output/interactive_rsmexplain_config.json +++ b/tests/data/output/interactive_rsmexplain_config.json @@ -6,6 +6,7 @@ "experiment_dir": "/a/b", // OPTIONAL: replace default values below based on your data. "background_kmeans_size": 500, + "clamp_features": true, "custom_sections": null, "description": "explain test", "general_sections": [ @@ -25,4 +26,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file From 938ae022f88f2969c3e517670f3017ca33ac74fd Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Fri, 29 Sep 2023 17:36:52 -0400 Subject: [PATCH 02/13] feat: add clamp setting for rsmpredict and rsmtool --- rsmtool/preprocessor.py | 9 ++++++++- rsmtool/utils/constants.py | 2 ++ tests/data/output/autogenerated_rsmpredict_config.json | 3 ++- tests/data/output/autogenerated_rsmtool_config.json | 3 ++- .../data/output/autogenerated_rsmtool_config_groups.json | 3 ++- tests/data/output/interactive_rsmpredict_config.json | 3 ++- tests/data/output/interactive_rsmtool_config.json | 3 ++- tests/data/output/interactive_rsmtool_config_groups.json | 3 ++- 8 files changed, 22 insertions(+), 7 deletions(-) diff --git a/rsmtool/preprocessor.py b/rsmtool/preprocessor.py index 7786fc116..f811a1d8f 100644 --- a/rsmtool/preprocessor.py +++ b/rsmtool/preprocessor.py @@ -1724,6 +1724,9 @@ def process_data_rsmtool(self, config_obj, data_container_obj): # should we standardize the features standardize_features = config_obj["standardize_features"] + # should features be clamped? + clamp_features = config_obj.get("clamp_features", True) + # if we are excluding zero scores but trim_min # is set to 0, then we need to warn the user if exclude_zero_scores and spec_trim_min == 0: @@ -1989,6 +1992,7 @@ def process_data_rsmtool(self, config_obj, data_container_obj): feature_specs, standardize_features, use_truncations, + clamp_features, ) # configuration options that either override previous values or are @@ -2487,6 +2491,9 @@ def process_data_rsmpredict(self, config_obj, data_container_obj): # should features be standardized? standardize_features = config_obj.get("standardize_features", True) + # should features be clamped? + clamp_features = config_obj.get("clamp_features", True) + # should we predict expected scores predict_expected_scores = config_obj["predict_expected_scores"] @@ -2547,7 +2554,7 @@ def process_data_rsmpredict(self, config_obj, data_container_obj): ) (df_features_preprocessed, df_excluded) = self.preprocess_new_data( - df_input, df_feature_info, standardize_features + df_input, df_feature_info, standardize_features, clamp_features ) trim_min = df_postproc_params["trim_min"].values[0] diff --git a/rsmtool/utils/constants.py b/rsmtool/utils/constants.py index ec9819c24..6bfd71864 100644 --- a/rsmtool/utils/constants.py +++ b/rsmtool/utils/constants.py @@ -134,6 +134,7 @@ "use_wandb", "wandb_project", "wandb_entity", + "clamp_features", ], }, "rsmxval": { @@ -224,6 +225,7 @@ "use_wandb", "wandb_project", "wandb_entity", + "clamp_features", ], }, "rsmcompare": { diff --git a/tests/data/output/autogenerated_rsmpredict_config.json b/tests/data/output/autogenerated_rsmpredict_config.json index f1f9beb1e..eb30ff0b0 100644 --- a/tests/data/output/autogenerated_rsmpredict_config.json +++ b/tests/data/output/autogenerated_rsmpredict_config.json @@ -6,6 +6,7 @@ "input_features_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "file_format": "csv", "flag_column": null, "human_score_column": "sc1", @@ -17,4 +18,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/autogenerated_rsmtool_config.json b/tests/data/output/autogenerated_rsmtool_config.json index c603d5cdc..73e2c83f3 100644 --- a/tests/data/output/autogenerated_rsmtool_config.json +++ b/tests/data/output/autogenerated_rsmtool_config.json @@ -7,6 +7,7 @@ "test_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "custom_sections": null, "description": "", "exclude_zero_scores": true, @@ -55,4 +56,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/autogenerated_rsmtool_config_groups.json b/tests/data/output/autogenerated_rsmtool_config_groups.json index 6320b76dc..8dce5376f 100644 --- a/tests/data/output/autogenerated_rsmtool_config_groups.json +++ b/tests/data/output/autogenerated_rsmtool_config_groups.json @@ -7,6 +7,7 @@ "test_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "custom_sections": null, "description": "", "exclude_zero_scores": true, @@ -60,4 +61,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmpredict_config.json b/tests/data/output/interactive_rsmpredict_config.json index b6c858c61..094cbb2e1 100644 --- a/tests/data/output/interactive_rsmpredict_config.json +++ b/tests/data/output/interactive_rsmpredict_config.json @@ -5,6 +5,7 @@ "input_features_file": "features.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "file_format": "csv", "flag_column": null, "human_score_column": "score", @@ -16,4 +17,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmtool_config.json b/tests/data/output/interactive_rsmtool_config.json index dc0d77f9f..916c2f928 100644 --- a/tests/data/output/interactive_rsmtool_config.json +++ b/tests/data/output/interactive_rsmtool_config.json @@ -6,6 +6,7 @@ "test_file": "test.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "custom_sections": null, "description": "an rsmtool test", "exclude_zero_scores": false, @@ -54,4 +55,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmtool_config_groups.json b/tests/data/output/interactive_rsmtool_config_groups.json index 9c385acb8..fd3b7e1fc 100644 --- a/tests/data/output/interactive_rsmtool_config_groups.json +++ b/tests/data/output/interactive_rsmtool_config_groups.json @@ -6,6 +6,7 @@ "test_file": "test.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "custom_sections": null, "description": "an rsmtool test", "exclude_zero_scores": false, @@ -62,4 +63,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file From 27e370fd3a679b5e37eb23a4d94ed4bf11c86179 Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Fri, 29 Sep 2023 20:00:52 -0400 Subject: [PATCH 03/13] feat: add clamp setting for rsmxval --- rsmtool/utils/constants.py | 1 + tests/data/output/autogenerated_rsmxval_config.json | 3 ++- tests/data/output/interactive_rsmxval_config.json | 3 ++- tests/data/output/interactive_rsmxval_config_folds_file.json | 3 ++- tests/data/output/interactive_rsmxval_config_groups.json | 3 ++- .../output/interactive_rsmxval_config_groups_folds_file.json | 3 ++- 6 files changed, 11 insertions(+), 5 deletions(-) diff --git a/rsmtool/utils/constants.py b/rsmtool/utils/constants.py index 6bfd71864..8ee1ff25e 100644 --- a/rsmtool/utils/constants.py +++ b/rsmtool/utils/constants.py @@ -175,6 +175,7 @@ "use_wandb", "wandb_project", "wandb_entity", + "clamp_features", ], }, "rsmeval": { diff --git a/tests/data/output/autogenerated_rsmxval_config.json b/tests/data/output/autogenerated_rsmxval_config.json index c1b2f5d04..7c386fbe3 100644 --- a/tests/data/output/autogenerated_rsmxval_config.json +++ b/tests/data/output/autogenerated_rsmxval_config.json @@ -6,6 +6,7 @@ "train_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "description": "", "exclude_zero_scores": true, "feature_subset": null, @@ -40,4 +41,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmxval_config.json b/tests/data/output/interactive_rsmxval_config.json index e31aa8064..8f9ec2108 100644 --- a/tests/data/output/interactive_rsmxval_config.json +++ b/tests/data/output/interactive_rsmxval_config.json @@ -5,6 +5,7 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -39,4 +40,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmxval_config_folds_file.json b/tests/data/output/interactive_rsmxval_config_folds_file.json index 5dd25acce..09853082b 100644 --- a/tests/data/output/interactive_rsmxval_config_folds_file.json +++ b/tests/data/output/interactive_rsmxval_config_folds_file.json @@ -5,6 +5,7 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -39,4 +40,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmxval_config_groups.json b/tests/data/output/interactive_rsmxval_config_groups.json index 066c47537..40ba7101b 100644 --- a/tests/data/output/interactive_rsmxval_config_groups.json +++ b/tests/data/output/interactive_rsmxval_config_groups.json @@ -5,6 +5,7 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -42,4 +43,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmxval_config_groups_folds_file.json b/tests/data/output/interactive_rsmxval_config_groups_folds_file.json index 9cd2e07f1..e0eb6f66a 100644 --- a/tests/data/output/interactive_rsmxval_config_groups_folds_file.json +++ b/tests/data/output/interactive_rsmxval_config_groups_folds_file.json @@ -5,6 +5,7 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, + "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -41,4 +42,4 @@ "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file From 7f19cfeef7765fc3f9bbf61d164328c16705e6a5 Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Tue, 3 Oct 2023 14:50:28 -0400 Subject: [PATCH 04/13] fix: Rename the config name --- examples/rsmtool/config_rsmtool.json | 4 +- rsmtool/preprocessor.py | 46 +++++++++---------- rsmtool/utils/constants.py | 11 +++-- .../output/autogenerated_rsmeval_config.json | 3 +- .../autogenerated_rsmeval_config_groups.json | 3 +- .../autogenerated_rsmexplain_config.json | 2 +- .../autogenerated_rsmpredict_config.json | 2 +- .../output/autogenerated_rsmtool_config.json | 2 +- .../autogenerated_rsmtool_config_groups.json | 2 +- .../output/autogenerated_rsmxval_config.json | 2 +- .../output/interactive_rsmeval_config.json | 3 +- .../interactive_rsmeval_config_groups.json | 3 +- .../output/interactive_rsmexplain_config.json | 2 +- .../output/interactive_rsmpredict_config.json | 2 +- .../output/interactive_rsmtool_config.json | 2 +- .../interactive_rsmtool_config_groups.json | 2 +- .../output/interactive_rsmxval_config.json | 2 +- ...interactive_rsmxval_config_folds_file.json | 2 +- .../interactive_rsmxval_config_groups.json | 2 +- ...tive_rsmxval_config_groups_folds_file.json | 2 +- 20 files changed, 53 insertions(+), 46 deletions(-) diff --git a/examples/rsmtool/config_rsmtool.json b/examples/rsmtool/config_rsmtool.json index 4bfa44d52..04b5d760f 100644 --- a/examples/rsmtool/config_rsmtool.json +++ b/examples/rsmtool/config_rsmtool.json @@ -11,5 +11,7 @@ "trim_max": 6, "id_column": "ID", "second_human_score_column": "score2", - "length_column": "LENGTH" + "length_column": "LENGTH", + "clamp_features": false, + "standardize_features": false } diff --git a/rsmtool/preprocessor.py b/rsmtool/preprocessor.py index f811a1d8f..ccb9a3327 100644 --- a/rsmtool/preprocessor.py +++ b/rsmtool/preprocessor.py @@ -1020,7 +1020,7 @@ def preprocess_feature( exclude_zero_sd=False, raise_error=True, truncations=None, - clamp_features=True, + truncate_outliers=True, ): """ Remove outliers and transform the values in given numpy array. @@ -1051,8 +1051,8 @@ def preprocess_feature( truncations : pandas DataFrame, optional A set of pre-defined truncation values. Defaults to ``None``. - clamp_features : bool, optional - Clamp outlier values if set in the config file + truncate_outliers : bool, optional + Truncate outlier values if set in the config file Defaults to ``True``. Returns @@ -1067,7 +1067,7 @@ def preprocess_feature( If the preprocessed feature values have zero standard deviation and ``exclude_zero_sd`` is set to ``True``. """ - if clamp_features: + if truncate_outliers: if truncations is not None: # clamp outlier values using the truncations set features_no_outliers = self.remove_outliers_using_truncations( @@ -1114,7 +1114,7 @@ def preprocess_features( df_feature_specs, standardize_features=True, use_truncations=False, - clamp_features=True, + truncate_outliers=True, ): """ Preprocess features in given data using corresponding specifications. @@ -1142,8 +1142,8 @@ def preprocess_features( Whether we should use the truncation set for removing outliers. Defaults to ``False``. - clamp_features : bool, optional - Clamp outlier values if set in the config file + truncate_outliers : bool, optional + Truncate outlier values if set in the config file Defaults to ``True``. @@ -1192,7 +1192,7 @@ def preprocess_features( train_feature_sd, exclude_zero_sd=True, truncations=truncations, - clamp_features=clamp_features, + truncate_outliers=truncate_outliers, ) testing_feature_values = df_test[feature_name].values @@ -1203,7 +1203,7 @@ def preprocess_features( train_feature_mean, train_feature_sd, truncations=truncations, - clamp_features=clamp_features, + truncate_outliers=truncate_outliers, ) # Standardize the features using the mean and sd computed on the @@ -1724,8 +1724,8 @@ def process_data_rsmtool(self, config_obj, data_container_obj): # should we standardize the features standardize_features = config_obj["standardize_features"] - # should features be clamped? - clamp_features = config_obj.get("clamp_features", True) + # should outliers be truncated? + truncate_outliers = config_obj.get("truncate_outliers", True) # if we are excluding zero scores but trim_min # is set to 0, then we need to warn the user @@ -1992,7 +1992,7 @@ def process_data_rsmtool(self, config_obj, data_container_obj): feature_specs, standardize_features, use_truncations, - clamp_features, + truncate_outliers, ) # configuration options that either override previous values or are @@ -2491,8 +2491,8 @@ def process_data_rsmpredict(self, config_obj, data_container_obj): # should features be standardized? standardize_features = config_obj.get("standardize_features", True) - # should features be clamped? - clamp_features = config_obj.get("clamp_features", True) + # should outliers be truncated? + truncate_outliers = config_obj.get("truncate_outliers", True) # should we predict expected scores predict_expected_scores = config_obj["predict_expected_scores"] @@ -2554,7 +2554,7 @@ def process_data_rsmpredict(self, config_obj, data_container_obj): ) (df_features_preprocessed, df_excluded) = self.preprocess_new_data( - df_input, df_feature_info, standardize_features, clamp_features + df_input, df_feature_info, standardize_features, truncate_outliers ) trim_min = df_postproc_params["trim_min"].values[0] @@ -2669,8 +2669,8 @@ def process_data_rsmexplain(self, config_obj, data_container_obj): # should features be standardized? standardize_features = config_obj.get("standardize_features", True) - # should features be clamped? - clamp_features = config_obj.get("clamp_features", True) + # should outliers be truncated? + truncate_outliers = config_obj.get("truncate_outliers", True) # rename the ID columns in both frames df_background_preprocessed = self.rename_default_columns( @@ -2715,10 +2715,10 @@ def process_data_rsmexplain(self, config_obj, data_container_obj): # now pre-process all the features that go into the model (df_background_preprocessed, _) = self.preprocess_new_data( - df_background_preprocessed, df_feature_info, standardize_features, clamp_features + df_background_preprocessed, df_feature_info, standardize_features, truncate_outliers ) (df_explain_preprocessed, _) = self.preprocess_new_data( - df_explain_preprocessed, df_feature_info, standardize_features, clamp_features + df_explain_preprocessed, df_feature_info, standardize_features, truncate_outliers ) # set ID column as index for the background and explain feature frames @@ -2775,7 +2775,7 @@ def process_data(self, config_obj, data_container_obj, context="rsmtool"): ) def preprocess_new_data( - self, df_input, df_feature_info, standardize_features=True, clamp_features=True + self, df_input, df_feature_info, standardize_features=True, truncate_outliers=True ): """ Preprocess feature values using the parameters in ``df_feature_info``. @@ -2808,8 +2808,8 @@ def preprocess_new_data( Whether the features should be standardized prior to prediction. Defaults to ``True``. - clamp_features : bool, optional - Whether the features should be clamped prior to prediction. + truncate_outliers : bool, optional + Whether the outlier should be truncated prior to prediction. Defaults to ``True``. Returns @@ -2913,7 +2913,7 @@ def preprocess_new_data( train_feature_sd, exclude_zero_sd=False, raise_error=False, - clamp_features=clamp_features, + truncate_outliers=truncate_outliers, ) # filter the feature values once again to remove possible NaN and inf values that diff --git a/rsmtool/utils/constants.py b/rsmtool/utils/constants.py index 8ee1ff25e..23d4f2442 100644 --- a/rsmtool/utils/constants.py +++ b/rsmtool/utils/constants.py @@ -26,7 +26,7 @@ "use_scaled_predictions_new": False, "select_transformations": False, "standardize_features": True, - "clamp_features": True, + "truncate_outliers": True, "use_thumbnails": False, "use_truncation_thresholds": False, "scale_with": None, @@ -134,7 +134,7 @@ "use_wandb", "wandb_project", "wandb_entity", - "clamp_features", + "truncate_outliers", ], }, "rsmxval": { @@ -175,7 +175,7 @@ "use_wandb", "wandb_project", "wandb_entity", - "clamp_features", + "truncate_outliers", ], }, "rsmeval": { @@ -209,6 +209,7 @@ "use_wandb", "wandb_project", "wandb_entity", + "truncate_outliers", ], }, "rsmpredict": { @@ -226,7 +227,7 @@ "use_wandb", "wandb_project", "wandb_entity", - "clamp_features", + "truncate_outliers", ], }, "rsmcompare": { @@ -293,7 +294,7 @@ "use_wandb", "wandb_project", "wandb_entity", - "clamp_features", + "truncate_outliers", ], }, } diff --git a/tests/data/output/autogenerated_rsmeval_config.json b/tests/data/output/autogenerated_rsmeval_config.json index a055df8c7..4fd0c786f 100644 --- a/tests/data/output/autogenerated_rsmeval_config.json +++ b/tests/data/output/autogenerated_rsmeval_config.json @@ -32,8 +32,9 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_thumbnails": false, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/autogenerated_rsmeval_config_groups.json b/tests/data/output/autogenerated_rsmeval_config_groups.json index da302d175..29d8a7a73 100644 --- a/tests/data/output/autogenerated_rsmeval_config_groups.json +++ b/tests/data/output/autogenerated_rsmeval_config_groups.json @@ -35,8 +35,9 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_thumbnails": false, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/autogenerated_rsmexplain_config.json b/tests/data/output/autogenerated_rsmexplain_config.json index a765a8ac8..cbe65a919 100644 --- a/tests/data/output/autogenerated_rsmexplain_config.json +++ b/tests/data/output/autogenerated_rsmexplain_config.json @@ -7,7 +7,6 @@ "experiment_dir": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "background_kmeans_size": 500, - "clamp_features": true, "custom_sections": null, "description": "", "general_sections": [ @@ -24,6 +23,7 @@ "show_auto_cohorts": false, "special_sections": null, "standardize_features": true, + "truncate_outliers": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null diff --git a/tests/data/output/autogenerated_rsmpredict_config.json b/tests/data/output/autogenerated_rsmpredict_config.json index eb30ff0b0..6ca95e613 100644 --- a/tests/data/output/autogenerated_rsmpredict_config.json +++ b/tests/data/output/autogenerated_rsmpredict_config.json @@ -6,7 +6,6 @@ "input_features_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "file_format": "csv", "flag_column": null, "human_score_column": "sc1", @@ -15,6 +14,7 @@ "second_human_score_column": null, "standardize_features": true, "subgroups": [], + "truncate_outliers": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null diff --git a/tests/data/output/autogenerated_rsmtool_config.json b/tests/data/output/autogenerated_rsmtool_config.json index 73e2c83f3..cf2350768 100644 --- a/tests/data/output/autogenerated_rsmtool_config.json +++ b/tests/data/output/autogenerated_rsmtool_config.json @@ -7,7 +7,6 @@ "test_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "custom_sections": null, "description": "", "exclude_zero_scores": true, @@ -50,6 +49,7 @@ "trim_max": null, "trim_min": null, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": false, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/autogenerated_rsmtool_config_groups.json b/tests/data/output/autogenerated_rsmtool_config_groups.json index 8dce5376f..96687f86a 100644 --- a/tests/data/output/autogenerated_rsmtool_config_groups.json +++ b/tests/data/output/autogenerated_rsmtool_config_groups.json @@ -7,7 +7,6 @@ "test_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "custom_sections": null, "description": "", "exclude_zero_scores": true, @@ -55,6 +54,7 @@ "trim_max": null, "trim_min": null, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": false, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/autogenerated_rsmxval_config.json b/tests/data/output/autogenerated_rsmxval_config.json index 7c386fbe3..609487903 100644 --- a/tests/data/output/autogenerated_rsmxval_config.json +++ b/tests/data/output/autogenerated_rsmxval_config.json @@ -6,7 +6,6 @@ "train_file": "ENTER_VALUE_HERE", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "description": "", "exclude_zero_scores": true, "feature_subset": null, @@ -35,6 +34,7 @@ "trim_max": null, "trim_min": null, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": false, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmeval_config.json b/tests/data/output/interactive_rsmeval_config.json index b2f946bae..a5c5b71d1 100644 --- a/tests/data/output/interactive_rsmeval_config.json +++ b/tests/data/output/interactive_rsmeval_config.json @@ -31,8 +31,9 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_thumbnails": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmeval_config_groups.json b/tests/data/output/interactive_rsmeval_config_groups.json index 02266fef8..6eb7b1c85 100644 --- a/tests/data/output/interactive_rsmeval_config_groups.json +++ b/tests/data/output/interactive_rsmeval_config_groups.json @@ -36,8 +36,9 @@ "L1" ], "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_thumbnails": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} +} \ No newline at end of file diff --git a/tests/data/output/interactive_rsmexplain_config.json b/tests/data/output/interactive_rsmexplain_config.json index e5a2e3d62..b88ac4337 100644 --- a/tests/data/output/interactive_rsmexplain_config.json +++ b/tests/data/output/interactive_rsmexplain_config.json @@ -6,7 +6,6 @@ "experiment_dir": "/a/b", // OPTIONAL: replace default values below based on your data. "background_kmeans_size": 500, - "clamp_features": true, "custom_sections": null, "description": "explain test", "general_sections": [ @@ -23,6 +22,7 @@ "show_auto_cohorts": false, "special_sections": null, "standardize_features": true, + "truncate_outliers": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null diff --git a/tests/data/output/interactive_rsmpredict_config.json b/tests/data/output/interactive_rsmpredict_config.json index 094cbb2e1..5745ea063 100644 --- a/tests/data/output/interactive_rsmpredict_config.json +++ b/tests/data/output/interactive_rsmpredict_config.json @@ -5,7 +5,6 @@ "input_features_file": "features.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "file_format": "csv", "flag_column": null, "human_score_column": "score", @@ -14,6 +13,7 @@ "second_human_score_column": null, "standardize_features": true, "subgroups": [], + "truncate_outliers": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null diff --git a/tests/data/output/interactive_rsmtool_config.json b/tests/data/output/interactive_rsmtool_config.json index 916c2f928..63ebf6968 100644 --- a/tests/data/output/interactive_rsmtool_config.json +++ b/tests/data/output/interactive_rsmtool_config.json @@ -6,7 +6,6 @@ "test_file": "test.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "custom_sections": null, "description": "an rsmtool test", "exclude_zero_scores": false, @@ -49,6 +48,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmtool_config_groups.json b/tests/data/output/interactive_rsmtool_config_groups.json index fd3b7e1fc..b5649514e 100644 --- a/tests/data/output/interactive_rsmtool_config_groups.json +++ b/tests/data/output/interactive_rsmtool_config_groups.json @@ -6,7 +6,6 @@ "test_file": "test.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "custom_sections": null, "description": "an rsmtool test", "exclude_zero_scores": false, @@ -57,6 +56,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmxval_config.json b/tests/data/output/interactive_rsmxval_config.json index 8f9ec2108..2ce6a52da 100644 --- a/tests/data/output/interactive_rsmxval_config.json +++ b/tests/data/output/interactive_rsmxval_config.json @@ -5,7 +5,6 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -34,6 +33,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmxval_config_folds_file.json b/tests/data/output/interactive_rsmxval_config_folds_file.json index 09853082b..239cdc253 100644 --- a/tests/data/output/interactive_rsmxval_config_folds_file.json +++ b/tests/data/output/interactive_rsmxval_config_folds_file.json @@ -5,7 +5,6 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -34,6 +33,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmxval_config_groups.json b/tests/data/output/interactive_rsmxval_config_groups.json index 40ba7101b..75bddca44 100644 --- a/tests/data/output/interactive_rsmxval_config_groups.json +++ b/tests/data/output/interactive_rsmxval_config_groups.json @@ -5,7 +5,6 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -37,6 +36,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, diff --git a/tests/data/output/interactive_rsmxval_config_groups_folds_file.json b/tests/data/output/interactive_rsmxval_config_groups_folds_file.json index e0eb6f66a..2fcb4bc67 100644 --- a/tests/data/output/interactive_rsmxval_config_groups_folds_file.json +++ b/tests/data/output/interactive_rsmxval_config_groups_folds_file.json @@ -5,7 +5,6 @@ "train_file": "train.csv", // OPTIONAL: replace default values below based on your data. "candidate_column": null, - "clamp_features": true, "description": "xval test", "exclude_zero_scores": true, "feature_subset": null, @@ -36,6 +35,7 @@ "trim_max": 1, "trim_min": 5, "trim_tolerance": 0.4998, + "truncate_outliers": true, "use_scaled_predictions": true, "use_thumbnails": false, "use_truncation_thresholds": false, From b01a073a1bd1cba64b25eb25d6c8b3fb64b13b67 Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Tue, 3 Oct 2023 16:44:25 -0400 Subject: [PATCH 05/13] docs: add details on truncate outliers in each context --- doc/config_rsmeval.rst.inc | 6 ++++++ doc/config_rsmexplain.rst.inc | 6 ++++++ doc/config_rsmpredict.rst.inc | 6 ++++++ doc/config_rsmtool.rst.inc | 6 ++++++ doc/config_rsmxval.rst.inc | 1 + 5 files changed, 25 insertions(+) diff --git a/doc/config_rsmeval.rst.inc b/doc/config_rsmeval.rst.inc index 8062d81b0..5ee2d2637 100644 --- a/doc/config_rsmeval.rst.inc +++ b/doc/config_rsmeval.rst.inc @@ -200,6 +200,12 @@ Defaults to 0.4998. For more fine-grained control over the trimming range, you can set ``trim_tolerance`` to `0` and use ``trim_min`` and ``trim_max`` to specify the exact floor and ceiling values. +.. _truncated_outliers_rsmeval: + +truncated_outliers *(Optional)* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. + .. _use_thumbnails_rsmeval: use_thumbnails *(Optional)* diff --git a/doc/config_rsmexplain.rst.inc b/doc/config_rsmexplain.rst.inc index d961cf26a..995bff184 100644 --- a/doc/config_rsmexplain.rst.inc +++ b/doc/config_rsmexplain.rst.inc @@ -95,6 +95,12 @@ If this option is set to ``false``, the feature values for the responses in ``ba If ``experiment_dir`` contains the rsmtool configuration file, that file's value for ``standardize_features`` will override the value specified by the user. The reason is that if ``rsmtool`` trained the model with (or without) standardized features, then ``rsmexplain`` must do the same for the explanations to be meaningful. +.. _truncated_outliers_rsmexplain: + +truncated_outliers *(Optional)* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. + .. _use_wandb_rsmexplain: use_wandb *(Optional)* diff --git a/doc/config_rsmpredict.rst.inc b/doc/config_rsmpredict.rst.inc index 642d0b6b6..9d07d4244 100644 --- a/doc/config_rsmpredict.rst.inc +++ b/doc/config_rsmpredict.rst.inc @@ -74,6 +74,12 @@ subgroups *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~ A list of column names indicating grouping variables used for generating analyses specific to each of those defined subgroups. For example, ``["prompt, gender, native_language, test_country"]``. All these columns will be included into the predictions file with the original names. +.. _truncated_outliers_rsmpredict: + +truncated_outliers *(Optional)* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. + .. _use_wandb_rsmpredict: use_wandb *(Optional)* diff --git a/doc/config_rsmtool.rst.inc b/doc/config_rsmtool.rst.inc index 62ddbc9a7..4cc4c23ed 100644 --- a/doc/config_rsmtool.rst.inc +++ b/doc/config_rsmtool.rst.inc @@ -355,6 +355,12 @@ Defaults to 0.4998. For more fine-grained control over the trimming range, you can set ``trim_tolerance`` to `0` and use ``trim_min`` and ``trim_max`` to specify the exact floor and ceiling values. +.. _truncated_outliers: + +truncated_outliers *(Optional)* +""""""""""""""""""""""""""""""" +If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. + .. _use_scaled_predictions_rsmtool: use_scaled_predictions *(Optional)* diff --git a/doc/config_rsmxval.rst.inc b/doc/config_rsmxval.rst.inc index b3a8a02d7..d86d6e37a 100644 --- a/doc/config_rsmxval.rst.inc +++ b/doc/config_rsmxval.rst.inc @@ -83,6 +83,7 @@ In addition to the fields described so far, an ``rsmxval`` configuration file al - ``trim_max`` - ``trim_min`` - ``trim_tolerance`` +- ``truncate_outliers`` - ``use_scaled_predictions`` - ``use_thumbnails`` - ``use_truncation_thresholds`` From 546219c63d2448a20c54179a41f9e52a5972e485 Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Tue, 3 Oct 2023 17:04:30 -0400 Subject: [PATCH 06/13] test: update rsmexplain experiment test to cover truncate outlier --- ...vr_explain_object_absolute_shap_values.csv | 16 +++++++-------- .../output/svr_explain_object_rsmexplain.json | 3 ++- .../output/svr_explain_object_shap_values.csv | 20 +++++++++---------- tests/test_experiment_rsmexplain.py | 1 + 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/tests/data/experiments/svr-explain-object/output/svr_explain_object_absolute_shap_values.csv b/tests/data/experiments/svr-explain-object/output/svr_explain_object_absolute_shap_values.csv index 209cebc45..98e1f3b05 100644 --- a/tests/data/experiments/svr-explain-object/output/svr_explain_object_absolute_shap_values.csv +++ b/tests/data/experiments/svr-explain-object/output/svr_explain_object_absolute_shap_values.csv @@ -1,9 +1,9 @@ ,abs. mean shap,abs. max shap,abs. min shap -FEATURE1,0.299631665831369,0.8345550127655463,0.013347643287204843 -FEATURE2,0.29033466624478443,0.7805457376678253,0.021574702955962994 -FEATURE3,0.08139637367725053,0.16395152624875498,0.002459832551694005 -FEATURE4,0.08358795348495907,0.23321996410481277,0.01513618529397907 -FEATURE5,0.15615884909224578,0.33346003308701017,0.01846187888312981 -FEATURE6,0.28026743976413465,0.6084412019685381,0.005868164061155749 -FEATURE7,0.06476554860819125,0.2513884684670798,0.000729204183812298 -FEATURE8,0.04287987518069,0.10240608813170675,0.010758458877758394 +FEATURE1,0.31253235647541666,0.8460557441423605,0.04047563422278973 +FEATURE2,0.2993396430589204,0.8605365027874241,0.02720989420625315 +FEATURE3,0.08007981161631131,0.2142185572077771,0.004119595637482491 +FEATURE4,0.08835839210242624,0.21645739385388524,0.004687200904119115 +FEATURE5,0.15569846556796665,0.337808740156355,0.021888732308370247 +FEATURE6,0.2970646384894685,0.6811831021237525,0.011592558322623448 +FEATURE7,0.06267215329781435,0.2339182289012514,0.001973291873391717 +FEATURE8,0.04165878891535699,0.10562076819489835,0.000829911084933965 diff --git a/tests/data/experiments/svr-explain-object/output/svr_explain_object_rsmexplain.json b/tests/data/experiments/svr-explain-object/output/svr_explain_object_rsmexplain.json index 84bb51da3..b40769959 100644 --- a/tests/data/experiments/svr-explain-object/output/svr_explain_object_rsmexplain.json +++ b/tests/data/experiments/svr-explain-object/output/svr_explain_object_rsmexplain.json @@ -19,5 +19,6 @@ "sample_ids": null, "use_wandb": false, "wandb_project": null, - "wandb_entity": null + "wandb_entity": null, + "truncate_outliers": false } \ No newline at end of file diff --git a/tests/data/experiments/svr-explain-object/output/svr_explain_object_shap_values.csv b/tests/data/experiments/svr-explain-object/output/svr_explain_object_shap_values.csv index 26742b417..8060f70c4 100644 --- a/tests/data/experiments/svr-explain-object/output/svr_explain_object_shap_values.csv +++ b/tests/data/experiments/svr-explain-object/output/svr_explain_object_shap_values.csv @@ -1,11 +1,11 @@ ,FEATURE1,FEATURE2,FEATURE3,FEATURE4,FEATURE5,FEATURE6,FEATURE7,FEATURE8 -RESPONSE_19,-0.07067473868926738,-0.5991628472083761,-0.1002242056743323,-0.09715078239663325,-0.10506730369961462,-0.44512875466342117,0.013089212941002012,0.010758458877758394 -RESPONSE_171,0.12364322328559758,-0.021574702955962994,0.01694258932339487,0.059387040913601134,0.03646317431432938,-0.3107302197417141,0.0936356808625167,0.027254555300104098 -RESPONSE_108,0.17560830612963552,-0.08857408960706739,-0.07391947708486325,0.019542610102776494,0.13309580251916422,-0.3952637737418945,0.12054721773963617,0.04806469628447305 -RESPONSE_99,-0.07953807922976455,-0.5517681322877015,-0.1291354433322442,-0.08385297910067088,-0.13799503055259732,-0.3298834134892442,0.05575380809257591,-0.05831580630167734 -RESPONSE_178,0.013347643287204843,0.15496753459317994,-0.025291037276699006,0.01513618529397907,0.01846187888312981,0.005868164061155749,-0.0277828854645632,-0.027934383069911507 -RESPONSE_183,-0.09923687824673885,0.31664847693046844,0.002459832551694005,0.07029459094433628,0.22007552143651274,0.09236919976123015,-0.02360282780611298,0.03177188308068227 -RESPONSE_6,-0.5706733051736929,-0.7805457376678253,0.12795049590853413,-0.23321996410481277,0.1599339360727206,-0.6084412019685381,0.03530650959963419,-0.019968852233332293 -RESPONSE_147,0.3339291078611289,-0.159675430080863,0.07056304408972502,-0.07066904307724102,0.2612926942482792,-0.5257296712238121,0.000729204183812298,-0.06874711029140518 -RESPONSE_13,0.8345550127655463,0.057046885558133524,0.10352608528226344,0.041458309262435156,0.33346003308701017,-0.03642384001051878,-0.2513884684670798,0.03357691823584916 -RESPONSE_153,-0.6951103636451137,0.1733828255582663,0.16395152624875498,-0.14516802965310457,-0.15574311610909994,-0.052836158979817735,0.025819670924979415,0.10240608813170675 +RESPONSE_19,-0.06923340530615052,-0.6297504517072231,0.004119595637482491,-0.07149478992130219,-0.1000461014784508,-0.5328961722160315,0.01731171138024482,0.000829911084933965 +RESPONSE_171,0.10010023357014904,-0.04795329736820944,0.03342105951255346,0.09601909259363599,0.021888732308370247,-0.255399908947333,0.06453439177400228,0.02481231059987846 +RESPONSE_108,0.2791808917844876,-0.15560341116082033,-0.0900612692753466,0.03215227278584294,0.07761401555817601,-0.39790409471957544,0.14770556160785822,0.05841865039175142 +RESPONSE_99,-0.04372548600969406,-0.5862544985577178,-0.06539355625416061,-0.058910930362788015,-0.1376078174179458,-0.37962377020956417,0.03073864309239185,-0.06155634332812944 +RESPONSE_178,0.04047563422278973,0.12595380043149962,-0.007557249674750365,0.02105558902718211,0.04852132289640179,-0.023825597417692742,-0.024729200394772305,-0.040719903499179574 +RESPONSE_183,-0.0700268387880018,0.24032426371773577,-0.009084319482197872,0.08009597853723793,0.2746358119011508,0.10131452876535232,-0.020407251063397386,0.026328923884766937 +RESPONSE_6,-0.6372729332683316,-0.8605365027874241,0.2142185572077771,-0.18885691405267194,0.22319834234302838,-0.6811831021237525,0.0595267657172235,-0.0063510059204844194 +RESPONSE_147,0.35910197934257193,-0.13890799852855146,0.08717457789560558,-0.11385375898559692,0.268489444274117,-0.5267599301864854,-0.02587648717361002,-0.055273707984764685 +RESPONSE_13,0.8460557441423605,0.02720989420625315,0.1212851123884624,0.004687200904119115,0.337808740156355,-0.011592558322623448,-0.2339182289012514,0.036676364264782595 +RESPONSE_153,-0.6801504183196303,0.1809023121237687,0.16848281883477664,-0.21645739385388524,-0.06717432734567058,-0.0601467219862742,-0.001973291873391717,0.10562076819489835 diff --git a/tests/test_experiment_rsmexplain.py b/tests/test_experiment_rsmexplain.py index e6ef8d730..2b70812dd 100644 --- a/tests/test_experiment_rsmexplain.py +++ b/tests/test_experiment_rsmexplain.py @@ -48,6 +48,7 @@ def test_run_experiment_svr_explain_with_object(self): "sample_size": 10, "num_features_to_display": 15, "show_auto_cohorts": True, + "truncate_outliers": False, } config_obj = Configuration(config_dict, context="rsmexplain", configdir=configdir) From 1de5b74bd359ee0357b6508be0c9356e0232b410 Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Wed, 4 Oct 2023 12:35:57 -0400 Subject: [PATCH 07/13] fix: address PR comments --- doc/config_rsmeval.rst.inc | 6 +++--- doc/config_rsmexplain.rst.inc | 6 +++--- doc/config_rsmpredict.rst.inc | 6 +++--- doc/config_rsmtool.rst.inc | 6 +++--- examples/rsmtool/config_rsmtool.json | 1 - rsmtool/preprocessor.py | 6 +++--- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/doc/config_rsmeval.rst.inc b/doc/config_rsmeval.rst.inc index 5ee2d2637..4ef08a9f1 100644 --- a/doc/config_rsmeval.rst.inc +++ b/doc/config_rsmeval.rst.inc @@ -200,11 +200,11 @@ Defaults to 0.4998. For more fine-grained control over the trimming range, you can set ``trim_tolerance`` to `0` and use ``trim_min`` and ``trim_max`` to specify the exact floor and ceiling values. -.. _truncated_outliers_rsmeval: +.. _truncate_outliers_rsmeval: -truncated_outliers *(Optional)* +truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. .. _use_thumbnails_rsmeval: diff --git a/doc/config_rsmexplain.rst.inc b/doc/config_rsmexplain.rst.inc index 995bff184..4c10e1f85 100644 --- a/doc/config_rsmexplain.rst.inc +++ b/doc/config_rsmexplain.rst.inc @@ -95,11 +95,11 @@ If this option is set to ``false``, the feature values for the responses in ``ba If ``experiment_dir`` contains the rsmtool configuration file, that file's value for ``standardize_features`` will override the value specified by the user. The reason is that if ``rsmtool`` trained the model with (or without) standardized features, then ``rsmexplain`` must do the same for the explanations to be meaningful. -.. _truncated_outliers_rsmexplain: +.. _truncate_outliers_rsmexplain: -truncated_outliers *(Optional)* +truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. .. _use_wandb_rsmexplain: diff --git a/doc/config_rsmpredict.rst.inc b/doc/config_rsmpredict.rst.inc index 9d07d4244..26b66e640 100644 --- a/doc/config_rsmpredict.rst.inc +++ b/doc/config_rsmpredict.rst.inc @@ -74,11 +74,11 @@ subgroups *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~ A list of column names indicating grouping variables used for generating analyses specific to each of those defined subgroups. For example, ``["prompt, gender, native_language, test_country"]``. All these columns will be included into the predictions file with the original names. -.. _truncated_outliers_rsmpredict: +.. _truncate_outliers_rsmpredict: -truncated_outliers *(Optional)* +truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. .. _use_wandb_rsmpredict: diff --git a/doc/config_rsmtool.rst.inc b/doc/config_rsmtool.rst.inc index 4cc4c23ed..aad9aebe5 100644 --- a/doc/config_rsmtool.rst.inc +++ b/doc/config_rsmtool.rst.inc @@ -355,11 +355,11 @@ Defaults to 0.4998. For more fine-grained control over the trimming range, you can set ``trim_tolerance`` to `0` and use ``trim_min`` and ``trim_max`` to specify the exact floor and ceiling values. -.. _truncated_outliers: +.. _truncate_outliers: -truncated_outliers *(Optional)* +truncate_outliers *(Optional)* """"""""""""""""""""""""""""""" -If this option is set to ``false`` outliers will not be truncated by truncating outliers that 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. .. _use_scaled_predictions_rsmtool: diff --git a/examples/rsmtool/config_rsmtool.json b/examples/rsmtool/config_rsmtool.json index 04b5d760f..4f12cffc4 100644 --- a/examples/rsmtool/config_rsmtool.json +++ b/examples/rsmtool/config_rsmtool.json @@ -12,6 +12,5 @@ "id_column": "ID", "second_human_score_column": "score2", "length_column": "LENGTH", - "clamp_features": false, "standardize_features": false } diff --git a/rsmtool/preprocessor.py b/rsmtool/preprocessor.py index ccb9a3327..7d433af18 100644 --- a/rsmtool/preprocessor.py +++ b/rsmtool/preprocessor.py @@ -1138,13 +1138,13 @@ def preprocess_features( standardize_features : bool, optional Whether to standardize the features. Defaults to ``True``. + truncate_outliers : bool, optional + Truncate outlier values if set in the config file + Defaults to ``True``. use_truncations : bool, optional Whether we should use the truncation set for removing outliers. Defaults to ``False``. - truncate_outliers : bool, optional - Truncate outlier values if set in the config file - Defaults to ``True``. Returns From 4d31f66df23b1178c0cc44628a9af32cb811d00d Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Thu, 5 Oct 2023 14:19:50 -0400 Subject: [PATCH 08/13] fix: address PR comments --- doc/config_rsmeval.rst.inc | 2 +- doc/config_rsmexplain.rst.inc | 2 +- doc/config_rsmpredict.rst.inc | 2 +- doc/config_rsmtool.rst.inc | 2 +- rsmtool/preprocessor.py | 19 ++-- rsmtool/rsmexplain.py | 89 ++++++++++++++----- .../output/knn_diff_std_rsmtool.json | 1 + .../output/knn_same_std_rsmtool.json | 1 + .../output/knn_rsmtool.json | 1 + tests/test_explanation_utils.py | 26 +++++- 10 files changed, 113 insertions(+), 32 deletions(-) diff --git a/doc/config_rsmeval.rst.inc b/doc/config_rsmeval.rst.inc index 4ef08a9f1..0ba434155 100644 --- a/doc/config_rsmeval.rst.inc +++ b/doc/config_rsmeval.rst.inc @@ -204,7 +204,7 @@ Defaults to 0.4998. truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers (values more than 4 standard deviations away from the mean) in feature columns will _not_ be truncated. Defaults to ``true``. .. _use_thumbnails_rsmeval: diff --git a/doc/config_rsmexplain.rst.inc b/doc/config_rsmexplain.rst.inc index 4c10e1f85..1353b4825 100644 --- a/doc/config_rsmexplain.rst.inc +++ b/doc/config_rsmexplain.rst.inc @@ -99,7 +99,7 @@ If this option is set to ``false``, the feature values for the responses in ``ba truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers (values more than 4 standard deviations away from the mean) in feature columns will _not_ be truncated. Defaults to ``true``. .. _use_wandb_rsmexplain: diff --git a/doc/config_rsmpredict.rst.inc b/doc/config_rsmpredict.rst.inc index 26b66e640..29cd3a006 100644 --- a/doc/config_rsmpredict.rst.inc +++ b/doc/config_rsmpredict.rst.inc @@ -78,7 +78,7 @@ A list of column names indicating grouping variables used for generating analyse truncate_outliers *(Optional)* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers (values more than 4 standard deviations away from the mean) in feature columns will _not_ be truncated. Defaults to ``true``. .. _use_wandb_rsmpredict: diff --git a/doc/config_rsmtool.rst.inc b/doc/config_rsmtool.rst.inc index aad9aebe5..516380f51 100644 --- a/doc/config_rsmtool.rst.inc +++ b/doc/config_rsmtool.rst.inc @@ -359,7 +359,7 @@ Defaults to 0.4998. truncate_outliers *(Optional)* """"""""""""""""""""""""""""""" -If this option is set to ``false``, outliers will not be truncated by truncating outliers that are 4 standard deviations away from the mean. Defaults to ``true``. +If this option is set to ``false``, outliers (values more than 4 standard deviations away from the mean) in feature columns will _not_ be truncated. Defaults to ``true``. .. _use_scaled_predictions_rsmtool: diff --git a/rsmtool/preprocessor.py b/rsmtool/preprocessor.py index 7d433af18..94c810b96 100644 --- a/rsmtool/preprocessor.py +++ b/rsmtool/preprocessor.py @@ -1052,7 +1052,7 @@ def preprocess_feature( A set of pre-defined truncation values. Defaults to ``None``. truncate_outliers : bool, optional - Truncate outlier values if set in the config file + Whether to truncate outlier values. Defaults to ``True``. Returns @@ -2554,7 +2554,10 @@ def process_data_rsmpredict(self, config_obj, data_container_obj): ) (df_features_preprocessed, df_excluded) = self.preprocess_new_data( - df_input, df_feature_info, standardize_features, truncate_outliers + df_input, + df_feature_info, + standardize_features=standardize_features, + truncate_outliers=truncate_outliers, ) trim_min = df_postproc_params["trim_min"].values[0] @@ -2715,10 +2718,16 @@ def process_data_rsmexplain(self, config_obj, data_container_obj): # now pre-process all the features that go into the model (df_background_preprocessed, _) = self.preprocess_new_data( - df_background_preprocessed, df_feature_info, standardize_features, truncate_outliers + df_background_preprocessed, + df_feature_info, + standardize_features=standardize_features, + truncate_outliers=truncate_outliers, ) (df_explain_preprocessed, _) = self.preprocess_new_data( - df_explain_preprocessed, df_feature_info, standardize_features, truncate_outliers + df_explain_preprocessed, + df_feature_info, + standardize_features=standardize_features, + truncate_outliers=truncate_outliers, ) # set ID column as index for the background and explain feature frames @@ -2809,7 +2818,7 @@ def preprocess_new_data( Defaults to ``True``. truncate_outliers : bool, optional - Whether the outlier should be truncated prior to prediction. + Whether outlier should be truncated prior to prediction. Defaults to ``True``. Returns diff --git a/rsmtool/rsmexplain.py b/rsmtool/rsmexplain.py index dfe8aad5d..e3a880ae0 100644 --- a/rsmtool/rsmexplain.py +++ b/rsmtool/rsmexplain.py @@ -36,6 +36,47 @@ from .utils.wandb import init_wandb_run, log_configuration_to_wandb +def verify_config_features(explain_config, rsmtool_config, feature, logger=None): + """ + Verify and update a specific feature in the explanation configuration. + + Parameters + ---------- + explain_config : rsmtool.configuration_parser.Configuration + The Configuration object for rsmexplain module. + rsmtool_config : rsmtool.configuration_parser.Configuration + The Configuration object for rsmtool module. + feature : str + The name of the feature to verify and update. + logger : logging object optional + A logging object. If ``None`` is passed, get logger from ``__name__``. + Defaults to ``None``. + + Returns + ------- + rsmtool.configuration_parser.Configuration + The updated explanation Configuration object. + + """ + logger = logger if logger else logging.getLogger(__name__) + + rsmexplain_feature = explain_config[feature] + rsmtool_feature = rsmtool_config[feature] + + # use the original rsmtool experiment's value for either `standardize_features` + # or `truncate_outliers` for rsmexplain as well; raise a warning if the values + # were different to begin with + if rsmexplain_feature != rsmtool_feature: + logger.warning( + f"overwriting current {feature} value " + f"({rsmexplain_feature}) to match " + f"value specified in original rsmtool experiment " + f"({rsmtool_feature})." + ) + explain_config[feature] = rsmtool_feature + return explain_config + + def select_examples(featureset, range_size=None): """ Sample examples from the given featureset and return indices. @@ -133,7 +174,11 @@ def mask(learner, featureset, feature_range=None): def generate_explanation( - config_file_or_obj_or_dict, output_dir, overwrite_output=False, logger=None, wandb_run=None + config_file_or_obj_or_dict, + output_dir, + overwrite_output=False, + logger=None, + wandb_run=None, ): """ Generate a shap.Explanation object. @@ -268,32 +313,22 @@ def generate_explanation( # read the original rsmtool configuration file, if it exists, and figure # out the value of `standardize_features` that was specified when running # the original rsmtool experiment - rsmexplain_standardize_features = configuration["standardize_features"] expected_config_file_path = join(experiment_output_dir, f"{experiment_id}_rsmtool.json") if exists(expected_config_file_path): with open(expected_config_file_path, "r") as rsmtool_configfh: rsmtool_config = json.load(rsmtool_configfh) - rsmtool_standardize_features = rsmtool_config["standardize_features"] - - # use the original rsmtool experiment's value for `standardize_features` - # for rsmexplain as well; raise a warning if the values were different - # to begin with - if rsmexplain_standardize_features != rsmtool_standardize_features: - logger.warning( - f"overwriting current `standardize_features` value " - f"({rsmexplain_standardize_features}) to match " - f"value specified in original rsmtool experiment " - f"({rsmtool_standardize_features})." - ) - configuration["standardize_features"] = rsmtool_standardize_features + for feature in ["standardize_features", "truncate_outliers"]: + configuration = verify_config_features( + configuration, rsmtool_config, feature, logger + ) # if the original experiment rsmtool does not exist, let the user know else: logger.warning( - f"cannot locate original rsmtool configuration; " - f"ensure that current value of " - f"`standardize_features` ({rsmexplain_standardize_features}) " - f"was the same when running rsmtool." + "cannot locate original rsmtool configuration; " + "ensure that current value of " + "`standardize_features` and `truncate_outliers`" + "were the same when running rsmtool." ) # load the background and explain data sets @@ -547,7 +582,12 @@ def main(): # or one of the valid optional arguments, then assume that they # are arguments for the "run" sub-command. This allows the # old style command-line invocations to work without modification. - if sys.argv[1] not in VALID_PARSER_SUBCOMMANDS + ["-h", "--help", "-V", "--version"]: + if sys.argv[1] not in VALID_PARSER_SUBCOMMANDS + [ + "-h", + "--help", + "-V", + "--version", + ]: args_to_pass = ["run"] + sys.argv[1:] else: args_to_pass = sys.argv[1:] @@ -561,7 +601,9 @@ def main(): logger.info(f"Output directory: {args.output_dir}") generate_explanation( - abspath(args.config_file), abspath(args.output_dir), overwrite_output=args.force_write + abspath(args.config_file), + abspath(args.output_dir), + overwrite_output=args.force_write, ) else: @@ -570,7 +612,10 @@ def main(): # auto-generate an example configuration and print it to STDOUT generator = ConfigurationGenerator( - "rsmexplain", as_string=True, suppress_warnings=args.quiet, use_subgroups=False + "rsmexplain", + as_string=True, + suppress_warnings=args.quiet, + use_subgroups=False, ) configuration = ( generator.interact(output_file_name=args.output_file.name if args.output_file else None) diff --git a/tests/data/experiments/knn-explain-diff-std/existing_experiment/output/knn_diff_std_rsmtool.json b/tests/data/experiments/knn-explain-diff-std/existing_experiment/output/knn_diff_std_rsmtool.json index ef712152c..157760093 100644 --- a/tests/data/experiments/knn-explain-diff-std/existing_experiment/output/knn_diff_std_rsmtool.json +++ b/tests/data/experiments/knn-explain-diff-std/existing_experiment/output/knn_diff_std_rsmtool.json @@ -16,6 +16,7 @@ "exclude_zero_scores": true, "select_transformations": false, "standardize_features": true, + "truncate_outliers": true, "use_thumbnails": false, "use_truncation_thresholds": false, "predict_expected_scores": false, diff --git a/tests/data/experiments/knn-explain-same-std/existing_experiment/output/knn_same_std_rsmtool.json b/tests/data/experiments/knn-explain-same-std/existing_experiment/output/knn_same_std_rsmtool.json index 54d1ba509..6402c0f1e 100644 --- a/tests/data/experiments/knn-explain-same-std/existing_experiment/output/knn_same_std_rsmtool.json +++ b/tests/data/experiments/knn-explain-same-std/existing_experiment/output/knn_same_std_rsmtool.json @@ -16,6 +16,7 @@ "exclude_zero_scores": true, "select_transformations": false, "standardize_features": false, + "truncate_outliers": true, "use_thumbnails": false, "use_truncation_thresholds": false, "predict_expected_scores": false, diff --git a/tests/data/experiments/knn-explain/existing_experiment/output/knn_rsmtool.json b/tests/data/experiments/knn-explain/existing_experiment/output/knn_rsmtool.json index ef712152c..157760093 100644 --- a/tests/data/experiments/knn-explain/existing_experiment/output/knn_rsmtool.json +++ b/tests/data/experiments/knn-explain/existing_experiment/output/knn_rsmtool.json @@ -16,6 +16,7 @@ "exclude_zero_scores": true, "select_transformations": false, "standardize_features": true, + "truncate_outliers": true, "use_thumbnails": false, "use_truncation_thresholds": false, "predict_expected_scores": false, diff --git a/tests/test_explanation_utils.py b/tests/test_explanation_utils.py index 73c83fca2..3a2a79ffb 100644 --- a/tests/test_explanation_utils.py +++ b/tests/test_explanation_utils.py @@ -1,3 +1,4 @@ +import json import unittest from os import environ from os.path import join @@ -9,7 +10,7 @@ from skll.learner import Learner from rsmtool.modeler import Modeler -from rsmtool.rsmexplain import mask, select_examples +from rsmtool.rsmexplain import mask, select_examples, verify_config_features # allow test directory to be set via an environment variable # which is needed for package testing @@ -205,3 +206,26 @@ def test_mask_from_learner_on_disk(self): computed_ids, computed_features = mask(model, background, feature_range=[5, 10]) self.assertEqual(computed_ids, expected_ids) assert_array_equal(computed_features, expected_features) + + def test_verify_config_features(self): + """Test verify_config_features when features are different.""" + experiment_path = join(rsmtool_test_dir, "data", "experiments", "knn-explain-diff-std") + rsmtool_config_path = join( + experiment_path, + "existing_experiment", + "output", + "knn_diff_std_rsmtool.json", + ) + rsmexplain_config_path = join(experiment_path, "rsmexplain.json") + expected_output = True + + with open(rsmtool_config_path) as rsmtool_configfh, open( + rsmexplain_config_path + ) as rsmexplain_configfh: + rsmexplain_config = json.load(rsmexplain_configfh) + rsmtool_config = json.load(rsmtool_configfh) + computed_config = verify_config_features( + rsmexplain_config, rsmtool_config, "standardize_features" + ) + + self.assertEqual(computed_config["standardize_features"], expected_output) From f37761804116b187b207418193c7b2bfc78c8883 Mon Sep 17 00:00:00 2001 From: Nitin Madnani Date: Thu, 5 Oct 2023 17:43:25 -0400 Subject: [PATCH 09/13] refactor: remove unnnecessary function Use a simple `for` loop to verify and overwrite the values of `standardize_features` and `truncate_outliers` for rsmexplain. --- rsmtool/rsmexplain.py | 69 +++++++++------------------------ tests/test_explanation_utils.py | 26 +------------ 2 files changed, 19 insertions(+), 76 deletions(-) diff --git a/rsmtool/rsmexplain.py b/rsmtool/rsmexplain.py index e3a880ae0..3e5c632b2 100644 --- a/rsmtool/rsmexplain.py +++ b/rsmtool/rsmexplain.py @@ -36,47 +36,6 @@ from .utils.wandb import init_wandb_run, log_configuration_to_wandb -def verify_config_features(explain_config, rsmtool_config, feature, logger=None): - """ - Verify and update a specific feature in the explanation configuration. - - Parameters - ---------- - explain_config : rsmtool.configuration_parser.Configuration - The Configuration object for rsmexplain module. - rsmtool_config : rsmtool.configuration_parser.Configuration - The Configuration object for rsmtool module. - feature : str - The name of the feature to verify and update. - logger : logging object optional - A logging object. If ``None`` is passed, get logger from ``__name__``. - Defaults to ``None``. - - Returns - ------- - rsmtool.configuration_parser.Configuration - The updated explanation Configuration object. - - """ - logger = logger if logger else logging.getLogger(__name__) - - rsmexplain_feature = explain_config[feature] - rsmtool_feature = rsmtool_config[feature] - - # use the original rsmtool experiment's value for either `standardize_features` - # or `truncate_outliers` for rsmexplain as well; raise a warning if the values - # were different to begin with - if rsmexplain_feature != rsmtool_feature: - logger.warning( - f"overwriting current {feature} value " - f"({rsmexplain_feature}) to match " - f"value specified in original rsmtool experiment " - f"({rsmtool_feature})." - ) - explain_config[feature] = rsmtool_feature - return explain_config - - def select_examples(featureset, range_size=None): """ Sample examples from the given featureset and return indices. @@ -310,25 +269,33 @@ def generate_explanation( f"generated during model training." ) - # read the original rsmtool configuration file, if it exists, and figure - # out the value of `standardize_features` that was specified when running - # the original rsmtool experiment + # read the original rsmtool configuration file, if it exists, and ensure + # that we use its value of `standardize_features` and `truncate_outliers` + # even if that means we have to override the values specified in the + # rsmexplain configuration file expected_config_file_path = join(experiment_output_dir, f"{experiment_id}_rsmtool.json") if exists(expected_config_file_path): with open(expected_config_file_path, "r") as rsmtool_configfh: - rsmtool_config = json.load(rsmtool_configfh) - for feature in ["standardize_features", "truncate_outliers"]: - configuration = verify_config_features( - configuration, rsmtool_config, feature, logger + rsmtool_configuration = json.load(rsmtool_configfh) + + for option in ["standardize_features", "truncate_outliers"]: + rsmtool_value = rsmtool_configuration[option] + rsmexplain_value = configuration[option] + if rsmexplain_value != rsmtool_value: + logger.warning( + f"overwriting current `{option}` value " + f"({rsmexplain_value}) to match " + f"value specified in original rsmtool experiment " + f"({rsmtool_value})." ) + configuration[option] = rsmtool_value # if the original experiment rsmtool does not exist, let the user know else: logger.warning( "cannot locate original rsmtool configuration; " - "ensure that current value of " - "`standardize_features` and `truncate_outliers`" - "were the same when running rsmtool." + "ensure that the values of `standardize_features` " + "and `truncate_outliers` were the same as when running rsmtool." ) # load the background and explain data sets diff --git a/tests/test_explanation_utils.py b/tests/test_explanation_utils.py index 3a2a79ffb..73c83fca2 100644 --- a/tests/test_explanation_utils.py +++ b/tests/test_explanation_utils.py @@ -1,4 +1,3 @@ -import json import unittest from os import environ from os.path import join @@ -10,7 +9,7 @@ from skll.learner import Learner from rsmtool.modeler import Modeler -from rsmtool.rsmexplain import mask, select_examples, verify_config_features +from rsmtool.rsmexplain import mask, select_examples # allow test directory to be set via an environment variable # which is needed for package testing @@ -206,26 +205,3 @@ def test_mask_from_learner_on_disk(self): computed_ids, computed_features = mask(model, background, feature_range=[5, 10]) self.assertEqual(computed_ids, expected_ids) assert_array_equal(computed_features, expected_features) - - def test_verify_config_features(self): - """Test verify_config_features when features are different.""" - experiment_path = join(rsmtool_test_dir, "data", "experiments", "knn-explain-diff-std") - rsmtool_config_path = join( - experiment_path, - "existing_experiment", - "output", - "knn_diff_std_rsmtool.json", - ) - rsmexplain_config_path = join(experiment_path, "rsmexplain.json") - expected_output = True - - with open(rsmtool_config_path) as rsmtool_configfh, open( - rsmexplain_config_path - ) as rsmexplain_configfh: - rsmexplain_config = json.load(rsmexplain_configfh) - rsmtool_config = json.load(rsmtool_configfh) - computed_config = verify_config_features( - rsmexplain_config, rsmtool_config, "standardize_features" - ) - - self.assertEqual(computed_config["standardize_features"], expected_output) From f977cb9d223d8bee48a6bed03ba8ea56c25d3e84 Mon Sep 17 00:00:00 2001 From: Nitin Madnani Date: Thu, 5 Oct 2023 17:43:49 -0400 Subject: [PATCH 10/13] fix: rsmeval does not need outlier truncation --- doc/config_rsmeval.rst.inc | 6 ------ rsmtool/utils/constants.py | 9 ++++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/doc/config_rsmeval.rst.inc b/doc/config_rsmeval.rst.inc index 0ba434155..8062d81b0 100644 --- a/doc/config_rsmeval.rst.inc +++ b/doc/config_rsmeval.rst.inc @@ -200,12 +200,6 @@ Defaults to 0.4998. For more fine-grained control over the trimming range, you can set ``trim_tolerance`` to `0` and use ``trim_min`` and ``trim_max`` to specify the exact floor and ceiling values. -.. _truncate_outliers_rsmeval: - -truncate_outliers *(Optional)* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If this option is set to ``false``, outliers (values more than 4 standard deviations away from the mean) in feature columns will _not_ be truncated. Defaults to ``true``. - .. _use_thumbnails_rsmeval: use_thumbnails *(Optional)* diff --git a/rsmtool/utils/constants.py b/rsmtool/utils/constants.py index 23d4f2442..33932e235 100644 --- a/rsmtool/utils/constants.py +++ b/rsmtool/utils/constants.py @@ -129,12 +129,12 @@ "section_order", "candidate_column", "standardize_features", + "truncate_outliers", "min_items_per_candidate", "skll_grid_search_jobs", "use_wandb", "wandb_project", "wandb_entity", - "truncate_outliers", ], }, "rsmxval": { @@ -170,12 +170,12 @@ "skll_objective", "candidate_column", "standardize_features", + "truncate_outliers", "min_items_per_candidate", "skll_grid_search_jobs", "use_wandb", "wandb_project", "wandb_entity", - "truncate_outliers", ], }, "rsmeval": { @@ -209,7 +209,6 @@ "use_wandb", "wandb_project", "wandb_entity", - "truncate_outliers", ], }, "rsmpredict": { @@ -222,12 +221,12 @@ "human_score_column", "second_human_score_column", "standardize_features", + "truncate_outliers", "subgroups", "flag_column", "use_wandb", "wandb_project", "wandb_entity", - "truncate_outliers", ], }, "rsmcompare": { @@ -288,13 +287,13 @@ "sample_ids", "show_auto_cohorts", "standardize_features", + "truncate_outliers", "general_sections", "custom_sections", "special_sections", "use_wandb", "wandb_project", "wandb_entity", - "truncate_outliers", ], }, } From ed86ad688ccc7e8a9fecf7ffb0b308b0e936d9fe Mon Sep 17 00:00:00 2001 From: Nitin Madnani Date: Thu, 5 Oct 2023 17:44:43 -0400 Subject: [PATCH 11/13] test: add new rsmexplain tests Add new tests to check that `truncate_outliers` values are correctly overwritten when necessary. --- .../output/knn_diff_trunc.model | Bin 0 -> 58005 bytes .../output/knn_diff_trunc_feature.csv | 9 +++ .../output/knn_diff_trunc_rsmtool.json | 43 +++++++++++ .../knn-explain-diff-trunc/rsmexplain.json | 13 ++++ .../output/knn_same_trunc.model | Bin 0 -> 58005 bytes .../output/knn_same_trunc_feature.csv | 9 +++ .../output/knn_same_trunc_rsmtool.json | 43 +++++++++++ .../knn-explain-same-trunc/rsmexplain.json | 13 ++++ tests/test_experiment_rsmexplain.py | 70 ++++++++++++++++++ 9 files changed, 200 insertions(+) create mode 100644 tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc.model create mode 100644 tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_feature.csv create mode 100644 tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_rsmtool.json create mode 100644 tests/data/experiments/knn-explain-diff-trunc/rsmexplain.json create mode 100644 tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc.model create mode 100644 tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_feature.csv create mode 100644 tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_rsmtool.json create mode 100644 tests/data/experiments/knn-explain-same-trunc/rsmexplain.json diff --git a/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc.model b/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc.model new file mode 100644 index 0000000000000000000000000000000000000000..65769794df00075acd7b08c2120fb7000dd59c9d GIT binary patch literal 58005 zcmeFZc{o+y`}mDyDv}hHLJCQy6iO|XN|cn6l8_-XhDw9dEU6SqvosKrDYNcl2$|=3 zIF5PdIK~Xm>iz!g-ShMPc71-=^XGHvx?bI9t#z-p_gZ@m_qzAm+*o}zS(*O%=efYq z$=2E4-bTjO-pt&_+|kFAegA)cdHbxr?&Ins>B(+mZsKV7J8iy`wT+Do8D!u0p9E5M zfrYt=v(b5TQ)hcet4qJLEd3{onTfNBjG2|`|Cz?4j$`!xD3is^HFLh`VD95N*Y2FH z!$o{g7B==K&YKi{ynQ@5oy;9f98K^Ad_0-A`gpQA;h>YbkGm(Ulf9!emOQ_2;+&(o zk)4UHxs#EPA+7_*?p@mtA33y3-iQ3D@aN}-KR*@!{M`8G=cYeDmHzxx_A&RIci!IA zEfewS$UPnY%aTtPIV zow?O%%TxA_PBKQ0=BFKhH|Fos7inn|35WiT#$AVO%XxN2WIi9Qxt?r}CT3RWobUxW zO>9o%UhHgX>*L94a?aTv-vEAE7Di6EXZvWa^klO&cXqTg#nHA_cGmV6oUE;UJh}gk zH2U2~ZGRWT?BJut;>l)bbjJSF?=bs?-*@Begu9U$u9u0Oskx7(41Pi^M!4}8VJ#c0 zQ!@WN8zToRQ`|v(JQr&J9tYchKcAzHCJqjGV38Hza5{I&)W*cg2|q4R_TT;G_fy1= zZLX802_8iI8O*nFz~0g9_i$w1>0`Oja)T#}#sBM>S}Og|C+f*%Vq;@}!N}Iu!~u_~ z<^SH$KZP5a*jSymvo*JKHad0D+1$xTi}&0quHPNv-+%vh1YWiOUGo1Yo?}Kj5&oMb z$|KVM7M>YDQ~z)2gdQPh>h+`lvp$0Vxqqg=)PsosTYZW02|4q+GvkSL;y#Elp+{VA zW;~HTQxbZFoLMdrKQrIV@Jzk`+c2RwQxfIRjGx(##QhTbge3A2=|p&D`rqRJrbp-# zlF*xZ9pXNT=RxSrltej%B+`j6k)H??>HkgsH~pFEGp|FW6aAYA6Oy<-5hm(S#Ltw3 z-pu%!VM33PL^~3CL_86mDgRavBA(DE${{2XKU2<3C&EN|M3~4mmc&eS8?Yi2y5 zPsqQe6Y(?Khsa07&%6$i?{AXGPt=3ZBjO23luv{SJwncm|8M$4K0?mSKQm0|&6I@R z|Lye%{h8&=4F9b?X6g~;5%O>8MEw7)9-&Xj|CVoNIuR!1%yNi$qTObu6ZI$bW~LM2 znUc`|o1CdnglFm#@qd$q{>=2h)n}$2Q9nWw=|s7IlSKJMI+2f%L_8sh_?ePOC*;ig zAg)8?BlHM4GyZSknejw^qI@Du=n?Y=5l=`Wod^?>h$rOCcp^;5zv<14C-i5w3!yht z68BBii%2KRA>`lUiS(J0D36FI^8YtEQ~z(_nej8rotaLA3HjgjiFBeK|E)cVatTS~ zpDAai6XBV1W(WlDJPo68eOksXsIPH@%tV{VgAnf2Jh#{}%tZbVC2Xr4!GI zkc2)ViE;=@#1oQ;C+bPW6OtGwM3~T@DG9xq@iW83b%=PPT*8i=Sq_m-$eH4h3<8)j3@4!h$rO#*K&w_ge1x%BoY6&cABY2g#Xs=L_AS0A$iq&YyaKJ zGI`nUZ%lMDm;xYvuGIhd!1}^-+k*x_X)8&t=g3X9@NoaB0!442x6Bh?ZD?4W&Ed@_@D?9A= z!QKQ*gMa;wmM1NZElu|OFSzYvX=-WquiL^(@sBI&e{)+{TDe=E`Ct4Izr9l|m^ql= zzR$TBHVixF_rKZn+4h<3AG70Qc7B+hA7|Hx+4XVOKA5$SX6?gS`*?PLFuOmR-5<{G zk7xY@v;Kiu|G=z&VAek{>mQi)56t=pX8i-R{()Klz^s2@);}=oADHzI%=!mr{R6Z9 zfm#2+tbbtEKQQYbnDr0L`UhtH1GD~tS^vPSe_+->FzX+f^$*PY2WI^Pv;Kiu|G=z& zVAek{>mQi)56t=pX8i-R{()Klz^s2@)<5w7fB%5*`hV-lrhn(=!0=)OF(MdAj2uQ8qlVGI=wS3Q#u!VC1I7j8hH=MuVf--u zm>^6zCJsZvWMK+1l^80f9n+5)#r*y_o&&>+5yXgKBr$RrWsDj|1EYh{#~5QQF%B3P zj2p%s!G{g_eA?>`#Af#JmnVni^K z7&(kGMh&BZ(ZT3rj4_rN2aF5G4dagS!uVnQF+rGcOdN)S$-)$3Dlt?{JEk8qiebVA zBL{{TBZv{fNMhtL${00_21W;?k1@trVjM6o7&nYN#tY+z@y7&V!ZC3e3MLCvh^fR- zG3}Us%qWHl8!H?bUW_0{1S5%&!zg3aFd7&gj6TK~V~KIVxM18c?ieqOAI2XOgbByQ zVJMg^Od+NcL&daX`Z1#zCcIE`V0bZt7!iylMh>HlQNw6pbTIlDV~i!n0po&k!?d#j_JpYVwmtG&4J;?2x3Gqk{CIRGDZ!ffziR}V~jDD z7zd0C#tq|+@xu6F{4qh8a7-MAg2}=ZVk$9IOgp9@Gm7~=_&G4V7(t8(MiL{3QO2lY zG%z|CeT*^2661hz!MI`EF(-#K>WkF=`kMj1ER0V~nxHIAB~bZWwor7se0cj|swr zW8yFrOctgPQ;DHs+A;l@Q4G_3e0>ZrMi3){k;KShlrd@;4U7&(A7hNM#5iDFFm4!k zj2Ffa)AXj2cD*ql3}M7-K9k4j31V8^#^uh4I7qV|>@)a)1BxWc}xlJ^%fW-kc^a z0`~t{1PcbOGY;(NM(6cduHT)Tfb2GgY3Zg6p*IJDDKT3aXwJJ;UMzJzD0JDyk`aMM zWN>=%K-xJ5QjL*bG!#QYVx|&T)m~ANvk>B2XEcP2%6nAI4a*_XZ0VWJiVUys!j4;hq51XlQP;V~k+98B(fGR&baSgxiiuAn55zpWv|iDm4}MeKkYlv*Ttg8oLj3=|6p8|mmdwmW#6qw0<+Og#%_Up zdJeK%o)?v!-HY~R&v~=#eI>jvo)aeeG6oLrq^)&w_<<5-IA!M)R6z2i@}`R)zaZ{O zW@Y_KDjctM;r&|Pf^1lfH}6O<1evj34F*>~TF+rnKi{AWtv{@O+2(j1>fKZ}#Y9O( z`<3^MEeq&_HQ(|-y`3C@*8-EHdtGy2>m7?tE7=(EmM*?~Xx-2mFqg+NhMDJN) zbULjAE{pIFa>lZ#c@%2i7w_r>*KuXpPwdsmuR}}zwhj$(tL+<^t6T*J;bkmxr8IE< zToKN`tP)P}Z?8IWrwuL$A3YG{KMV!$&TB2903zydkP-chXYwcPH=XY>l0!qQ3N5{P zdoq!+Q^I=h!C|yc?(9~Xwj9Wu&;Pc|+#6I@&fmV^#3XtBLmO?J?_JM@+t(kcU#u)a z(L*;L2X07(b9~nWmAE?5kWu*d1tpEJSoE#Pcuo@%jTZ^JTWAEGnmp&;x-`SFLC@3T zxx;WcQ&jK$?R1b`{J=%^aTbztJXWt<{sjy(Rg2@3o8hSYa*iT;Gem`n<$V~cLf0xH z3~gT0;km)^lkUJ^vi-g3fr(j>1E6)_*4EU!?GUkvW!|%jcz7e~dN^Q+4#rNSK`BeW z!r7oE^R&!5IK1g-qvf^%&<{C(QdI3L^i&qAOnzwt!-1#c{LE8e>{6*4_n`yE3psAv zj}Ak(wUCO~ykGETQ|3AOl4Q{1RnfhDtQ@>0DmP4fQc=T@;4|4e8cbDNtt^xqL&jEW zKg3osQ0gdkJl8G-F>m+cZ+%`y_J94ss?iNVMJ+*EOcN)Dk%B_no_k@nh_^nBz09&7 zp4q$3Q5bE9=>?ptlBRw_(y^0E!s$uq-o09;d*kD9C)qCMsYe>be$e5uPcDGldY=|Z zhEvg<5D%@V>8&7THzF!4YL0$xV><9DxeZ!reQ&>V_mllk&+6A@gH!cz=vd`oQ*jAY zTwxuvPVYreDwcXtKKGzY)n&SKKSZPNG}n-fiC(m`JxM&2wU+EZ?T-Svqdb0r%~`tK z^+P4VeP_kP+pJ&ULB{^@iK<~Z)WRldBf~%*hbp@-ANEJIRE`(kHI3x>yMBtXICHcc zp4_S~(9G;ZBNfsGM+;RL*N`aiG1Hcrq*P%Q)2kKVs__|cO8VY27 zK7W2~5{UU{vU=-dCfyVWdh@MAB0UA21SQ|)8%v;nk@~gt!CZ8A)#!1C-!E8DdYR*t z^e|XN3>~^JKTP)jfdw8h39{knR{`%K`_M?lBcdl7Z$pO_yB;;WZjFTxJdavz`?KI- zf>u~X$SAmMm-)mK)&}%yQwo#uv$OqZ-t#N_7D#r$u|i(? zM7+!)IKAln{4rw6GypR#p&4rp3Rf7SiAUUL2l=yCFCksF4A1C=IQzcj%8xFO}U zd_|BzUj$O94T%1`V^#Sr_QWob&uM0CrT&@!(?9VAslM(*_g#6$?8{gvx%+F5b`JAW z-1vLB*Ce#UMYFUwdmb}WPK=(n=|9dwk-Z}+Uk3{)#Q1w87rNioxf|JC?H(4b96?rW zy=lEFwTM>1DOwje0H>@cCN9;rgImhy>TKs9pz7rdvUrAT|W(}s8MT% z*L0K5Z>yJq<|XX_)G!j?)u1zp+-V~V+Z4JGlgJMhHVy_@WTTq*@(fh%aZsz2cN_wR zUP?L9St&&S|6;y)e>*<|qMC%9T|bXP8~-Z*R!wF~byx5FhoqZ^<^j0^_Sz}tFy=DU54gXxz%sB~r z(fgIHh1HOvZU`FTZHPx>WwJ_bLIr*^Mna zr25JEvswS#=O4$ZXx_xBjHvV`^8Ta5yt*Vj`rt{ETv3B{EqbxTHOxyp19B%%Z+snC z3N=hqCyQ+xAx>61Z@Ez+gs&7^zDK(O0?w4Z)L!3?LK6E2RuA^W1K)LzPu3iU)ko}3 z7HTFSqqWD_R|iwjy~k1SyLa^qlBx4#;O|k1Y^)EEC|LFU)FI0X$iI$kY^UZ!W z2>M&E>gjD9K@XKaIi|h@$lyBt>SRqjT-m?s*{(%%)HZO;+sNJ6O|Cy$j%dwMvmW$I zHCw{&-5`=ls^nj{tqV;qyL)tp`WSL+K|86td(eueDPNiX6jZW)Z}BykPO|+I9KXlx z<6x$!7FdQ?U!+2EY{JM{QGk)H_n%gCjlgH7xQ!#r8EAoVm|6k%6XfyPE$L%xH4yz@ z!H)N_@A`7E|9I56*t`{tVw7+Ded~tkpJGvk73na)D6G3@ln%WfEMof)0lKtUpkk`) z&-|@!!nej>dIJ4SXx-7`c>^&?Iu2*=Y6pq5k~KCfror~C{oBVjo#0$$wu`-P7}A%r zuHbw;0Yv|8)X>|)T+#%(k<~$)=MKSP(!93ApLUGhspKl(WMZ{wPRG^Vmj@>BuIf= zj$;p|_eDdS>$&~>?EPr*m0Rv>rw5UpZpW3u*Hk2Z^PrqwV>{V?fojYB4233;VN3ch zfwfs+dHKqU%Y9Ag$}>g9ijCF4#&^WOEWHu#eO&c=>#}MvPDv|#|7946^%JgZN^^bg zfwI$isdU*KM2Q(~v12rWUG6hgrg_z9tGJ5Sog-CfY3kmEr8&*0t84AXTdG5({qSV8 zza=|~feyyUw!F$`CfC1>F~b7Kj9S5r{YbF=9cIdQ{sFDfWh@kq-Susz?7S4+#sS0m zsXb`btxeSxZNq4lar(~sot?mR?b7BHH73gYH(QmL#E-){8L`**cRWLklE}XIt2!aM zd9kV1-F))-*O=7!&cEG<^eA7Sjfdue>Ois8`O{jnZNYW0|T3`5jf)GfAX44Jqp?RM9Jvk5Rz#$jXx7z3_o%UHk}7$_^WGDeYZ4`{pSp?llwch3h2KkST0)E3F#}NVmR!X zD8)uclA@Hy;bEIhP`B$_^znd->+$$bm|`tgFskq;*RRKVF`TMxb96kwAZ9l2 zNbu=4ut{+6&e`7x3Zi$dDVLhzs=Qk5POnBd9G|X38L0xI{<${P@->hRPUaj6?gb18 zPqp@(t{FkacWtvioXmuiOLW+lId;JHdC%7HMX;!?rw?;3<7y`D=cT(7e@@rLAf~;0 zrSgvtK!~S!aA9gU?3-pjzrAi8Y3>u*CMVtne77aT#gibDX4s<;;^FLDhsqz@SUJbXqkxD>;Y|)y$g$rua@;Zxa_Mb+44*V| z{PFsy7=JwT3#=#7@7(%X220hM8jATS$n6o=wXMAAs5Zd+)`J{6Tot->=>*=F4Ht-& z-z}t(^UwLzT^9gz#PHSu zC}3XxF3F;q%ztOOq^C(|FG|!J(%z-<3-Kz_#q2rao^_kC&*afm&e zl(K7qI(e?J;PFm!{aUhfxWy(fAMcBl@-HuIMFZD9Zz#>rL{G#yEa=t4h;M%Rx3kak zfEHHOCVJNsYCeAz4Z!;%!hbPGy_wnQWeF6l>!!tpHX~LJ#~QdyLl?Nev9Y}wKv&ge zr7vG?1eHB(Kc23kqL^AE+jHjaSUiD+O##B3Kq${kAxz`Ud5;6k)Z_3db zfh5*w>n_B%@(WwOa3R{K|7h>-<7S9Ui{-O?^b~o0-S$N$=GtFj%D+DAz!xeiYn^wc z(p zat636Co0afPKEbdp9M-Ch=D@IiDyYC@Za4|e5lE$!$2h#Jc%^_2DDS4T*cJ1mFzzU zFCG(;M0lThUuW4;>Nk*ycz;GBr5^^F_IuoP?tppOoJL!OV_{>5RS;Y9AdF@Bz5KW! zhaCR`?N)K`>A5iX)3E|e`vFkA_hc$mrxJOj8Wt^F)`cF8*RRzI$Vb`5tJBN$-Qh$T zPov-4X+-!>iX>7GeONmN5773E`^h8VskttV@@NRE1p?00Z)t?Hx~AuT8q%S%tKsD2 zQwFe|N@sq{(o6bZc%+5xA{UmTGm|$JZ%8!4J4QLzrZvUzv^&DSjnM)g4hj>tD!Bdv zS)PY#yP$4Rw`D*3ANzxjdg7$Z2VuA0#j~2NoP5-1 zU32|l_AdxJxunZ?-7r+tC45cnVUY8$ISRZk#y^7Iet48G>p2Mb9)9}8v@r}_jaxou zvosQ%1I;$JEM=hJcZyE)gIUzpCe7Kobsd%Tzj$P9`KWzk06c0(N1s(N(BZ0J`JG19 zh(C}&>PK5Y?3UUeohI83YBzh|4v72&K5H$zBdu=`v3@tda7)vE{S$@U36_sDtwjqy zUg2L{la1~dM$yt)N6db0L?Nzkm=|;L zx_cL@cdU+275a%jpDm8uHRX@8w!IS5=W2z`LONLJ~AE)A)+eD2zX{av+PSFIoTcY?39_&SLL)gZ;_IxW@BqBe5g zGI7N>2HAhY7_uLNPW*y`FRap&)(jY5`hcU*k_KiKqwc?G1z>dSK*~O@Z?NB+m3DwH z8gi^2eDBZdC)YpWvO3Ag&dtM>ngeq%u=@YZq$z-};~S z+Z-QCeO$sXwEl7Pk>c~^XzvUAki7|!=$f9dTi@0e&=$M#L+u6?mQ_Wph!LZsaZSUx z2QK|&`)%U7dEytX8+9rl_EbB;Kvw#K*S#G_(8{A#L&%0-^NgASd-GuwOE7os?G-WPiG> zU6x!2i+6f_fBLi!2>X55Wx~nVl7Rx3Y{^`9ydFAog_kglQ^0!zPlH0&5M)0(CEPPP zh(>(94d%VPjZ$PT*iA4skn7)IRQ2@79YY|X$90-IGJ^IvI5qI^?nJu0-wwy>jv?uT zKYd)SdeFqaZLEnEspyCHGASYKUnJJQeS(2Xd2~Pnje`1;0>#MuMq=)+sCHB@VrG!z z8w-c>N?o37GSK%&HJYEt2ap3r*y%a?(|_Lk*sqCF7(mBH!oB3z)xiAcO4-LbdeMu_ z9gZf_6-YgfDso4*3hD=YRZLQ^B6d57OLMKnLthXL+@2~ zG$9FN_gdb&4XEcpIbB4Oflg?y=TTkV1e(U>A#yLe$ol7HQpA~3E}}!9isuGD&&KoY ztvL^LQ&HQ?R~!Bm3X96;DXfg(0}48`-#% zUI1D&`)3!jGmvbBkFEvmr6re>!0~}Pdf9{s|6#_aT^!%845E{?p=Uv3MNs~5<4&8ow#(4*;8i%Z!57qatVph`tLV)LRYt?er#>(4Rlz}J^gMQyg0sKqn` zKD71+_E3L7;IeDCw^;Xs-Q}xi9SR2Erd-o8y+|r*-=5+9zMF~kU-o>dm_ODqhL~N$ zmD-eApvhrEx#h|*?*GLiT0dxLPOI-?2W+b$+58zJ%Lb3JZ_TF_y?RUxK%m~|E{CFIBNpn1+ysm`i$_H|(X6;btn>pS%(u!K%{rL2}G8zc~ z^J+#*pwQ7eCYDPaKR6p+X9)KiDxBGXdK4A8{^K5JPIO^)#5xHeU zH~2lf?|Mb03MnnG&7w8n{pf%E_b>B=^Fp3hqjkMEJ-NPgqkRt=Se?x|DBOG3{=!4DV{Jr|C;=^^4RcL;H=(abh40vpk<52Kq1a^+P zsP^2Ufw{N%yp57{$lIzQ^EjOWE8+Q~x7Z&_ygzk{4Cr|It_i)D2vaE5qQfa?|NPxM zdSGP!rihw}arE$9T8?mHCw!Y?HM}TOiFOqnrbr5vlKtmk@rubqY%QQtXlQ(%rxB$` zn7ytVYzB4rv>Q+L#*kOsp5{aa2AY4h3;NgM_c!Lv&X%GjdCzJaJ<-F=$ zLIFc)?aLh1BEvxlT77Xx*`r2y-_9Mq^J+d6M&H#tx;zTzDgI3M3`vA)&Da-|7sH6~ zzn>B1ym$F|3xtGf4n&AjQE>PL6|YxAXxZ0?G<)|}1Hs_+4OK z7OjZf|EWjbJtT?kdjXkiRK+wpGHheFkhkqc>u;~jHa*mevLbtmkGy;jQ+C^?Yt4Ru zpz{(jZ^MVA|6%`k=e%z^`2AzL(&N?B*>K^7pC2ozZ zDmyS*6Kd9i7F}5u#6VK1Lb-FdH=zmv18w#XKR|v*x{m(x zVRS@H=vrnR4YBgY->}5%t4UM+u2+tK-d`M_Y|>R!7(y$vH})^j&4$>xr<=b8jzQB$ zUmo>#Po#94h0zt>4%@~WYjq@w;bMv6-f@{0a{Q@WxN7#sxf0gvT6wXfF*Gm7=cl!1 z6Wa4^N`S|-0WDo)T2U^-Kt;7r2bCO}fGc5#S#EnDdH)WbA6+k9Xhm^Ghm4BTs!_!X zN8ao&LumimhwJy4wIYoNRyMomFwoD-nrpsyw4ro`c#aFr@#OQj-MTEOu{{N{#aEg= z9k54H8b3s~JnBK6RsB1?BL|V!#H7$Vg$zX9G<>XQ;{ZAii4v^7f8L+ixShMXxoZ$) z?yI)@(y2(IF0ttJR~m}v-TkO_;1>GFwg%8@+g_AeI|U_kMuneAWBcV$_Lj8iPGl#YonwgKOAz}n zQ*D-#36Bg2EPtrd-`WH7Hk(}QPnZC~bFV)@&Ls3CdI>KSssj7l7p~M@Z$$!fOU^fO zw2=1ugS_e}$B-Oo)&FcO=~M;-5`(KY=4PT`M)tf<3c0XyZtGqhl^&R1c!mFwS_dpR z;yn4ImqF%V{L7i!1bI?j<$o?(u@bstw!P2Q(?}Qm!ohOJERLJOq55g7uY^? zGEoF&R6a)T9wO(T>$g?Hd19zgUcGMpj&p735qp4kWJM8Ne;TtoIAaWPEs=ZXE5Sf_ zhlBUpVEau`ee;k5_W)`CxNw{8bCoWJ)$0a6Eo^5%{tbl+kthc8wA0(%R{jo2k4Xko zT$w15WwIWg>g_NlU^W`OxQBfIV<@B(_&{|MC4J5DmAROMn7oswSPTYWJA1hOnWk@` zXJ9YA<$exq4&gasl{^Zxq?_`S$C}9Xhcn^)Md677$kv%ZRw`DBHcN3Cy?M)k1MH{A zbwUy#VL^3u*ct|E+xbN@XjK3*{JdW`d~QBD{&gFS<2w|~;j8|#ZWpd-B=LgPbexTj zY=_y_QSmw;wz1%z|4Ig0l{9C{H8LAjJ+4V^I#P-V|Bb8W*3t(34yZ4UX=18$N2MBD zKA+eZ3kyVVJ2d$BBNgrVWo1eX6rOm`XhR4Uq3oj>WuIHf_51t&aMfw+LUcav<#gF< zIwTyco#T$@pQ(M}<XR zmrp)VGd#4|cwauQ7Ez;i_!q{0N3S(RJ#xEF# zFM`4c%Yui=@fT55w>oS~8(K2z>0kH04c5-PDZDMM7O~x2#`|R-1zkz&Stwgd1zL1w z^PTQI;LJA}T$Pqe?q9Q&CVV$9t3o$BC-w@)jlk^-cUFF6XQrfz$GyJ4jg3;YGdbRP zydRE?INst8tw+0kmOWOxJwUF1i+{ejweNKoBF>LR{j!$0*Yyn@-kh!1r`e43&K>Am zaHAWl?;1W{o&6edu1<@&E?-0LUsM>9$2m{8gXHAOrV^fJR6e;kR=bCR=u7gx_h`2w z8#bx92Hbz2=g*b!y3mGV+6)|*Swxfedq~=X()BAQAy33$AjK;iwLE$I(OkR|tPi@1 zottO|1)90i>ws`b+PLp{2!1c{%}8T&7)~el@8))XX_;~Zpc}8pcCn-jIZ8pbI-(=p zxvTRWDFcWhcibxdRyCL}Hn?T&mWnzZZ&|;xrU7w&pjG`$f-xR%TBqyfE94hYRyzgP zCEu$oabZJ|He|7=r)Q!Z5GqUN#e%&iML$D7I`(3 z`-k^@|9@LG+Vg%+s&;|w%0e>{3jstJ|9i70J7*a_VopG{xh?}B#P#*L#w)i9i4 zXnE!*i`uq9(@P1HedPE%`-Gk~eW4#2*~zi|!t?I8v2!u$XPeQ*Nr&iB-eI&Ws<}=W zx9`A*Q}MC}eQ4cS{oY{FYI6K6)%4#hF7^)H)F0o;p%w*k2AVk+Z{+U=qMiZ&q8Qyj?cdLKD*BpuErfpVR=OcGj((hp4&8r+3RB(I z$_wkOk(?3B))wPtt4^|O})eIT3kKM4)Ek1gddC;eYLi-O#F zcz>X4Kb=-SmMa40rX-$Y`h93fQSyo3Ko61%Dk*#15QZl7_LQ#*>_yBbf~$2NH35N}#LKn>`uOBYot-wY+?IyCHn_O+3C#PpFkmiG-oC0_K`2 zp3U9yJkAy_scz5#@!j{CA>09d=$fD?e z?5dLy=)5s!XM=S;Isfq5=wAQ0eFR;(_r-@2*M-JzxVekSWg+87{`Kz4cwewSE#C>- zuPOKh?k3cO7WcgmWt{mH-JNCR`bSr1n%BrO2^QKfgR`8*k@(8u;#b(7{Ax3ve}%gq z&C?xxQY+Yumd|VAUz7G68A^sK_RQ}k{eM?j6O1bbi{XjjKGjF56bLvP|E_dh0fZ;= z$4ZLPp{>m8l~&0B$Ozx6u5zWK1CJCIsK1&d?cZ`oXFUb~KIGu}dAouK4Qa~jBR!`k z@G;%A^=*F~T*`LlJaY0Pa@x7@WSGY=2=Q}7zvXHquV2|*HWu}{6Yc&+YtfEkp`6{F zdSk4z1KgLFb9A;01M{N?+G~$6(Az_Y+~&W%hO|VD6?PnFQX~8asmDj`-$jn0Ko2f2 zFPU^ShuZ7>H8dSk=HC##|E>eQ&}CRiIniK8@IA^R_GWY>X_0&lwr2_d5#Of=77Bvhit^mHUD}5Ik(Bew&sOu~perS-?@Q*f zsPPLnzkfGeNZP-3lQp-(eoev8LsmOj9=D^;V`^;N)7dCvk>gcOd@g~~#L3d|Fc0)T z=_cpNc)`QnC%dI4M#=TBVuzvrW#N9*x!hm*eOxbGt7WTosvbwrt*(liNz_1<^1>~d zq3zIMwIlOLNGn?LYPkJ<&IdC8#}u|LM^p4{nh?5%G?oG)a{%YqBwp&ezN zCX6BZd1DtoY+|6zXGA_-+uemYIKIW0MYoadFJ3Fyo(cWXyeVxRUA+@B;@Na7CYn%6 zz=|K{2{dG_{En^MG#7cDyu11|_B)#1yUDS+vXp%ObmvRgBWd`2jE0(O)teOXWkCY+ zdw!tOLm68`&wNKF4~A9Em3q+dtyPjgg_}?Wo2YTfO@is)gk)43TvB@1( zr2lhz|1rIvIi={-w&13wSV$QZBnyKYbmGs%m0nGXt{0Te!TLJE9Y%DRF2ny3+^`jXrUB*fyaO zwWNtfeg)+H@8FKuK7qf-$*mnM6vO9;FKOISggOR#6_9>CN$nO&$dsie;PYrJistS8 z$b-LUnL0GdwWpPwKVux2_gA)+gJXv1wJM)-80lVEzg~rkPIbj3i$83IpFX?xB~*#eb+cO9kEEL)Ds-$Zhs*E2 zR@*owz^0=rH&bgf(f-n;_jZq{|NaTxt84j3hbL2E_(S3RkXWocDL<;g@WN}B86p$ zX}?6k)%GrAUlbV{>syEh&MbQH_PQ%NPj~HiellGKJEg1mEgbn3G?tj+rc2BADZ}!dT`CJ|PTMg_V z`FLEICdo{B$H&vT{>@y9!fE~(#R~&4(t2Dih^-#Qp1r&~Ve23N?HoZN)1J(3P||BS zy!K%tC=H#Th*?~KPS7{+{9bJb+s!t#eh;9*^kLmoLQh*@d->9Uq}xqo|4r$*{P5z9 zP6#PeKCO__fcTl*4R+;afWq#cPYV^tQ9TDkGH!ej8E?EQRKwMce$Fj^(EPlE9RJ0~ zkWj>fSFq=pOn?ttKS-PJ;nSV5Wpgyu(D2QJ5UNal0dd*>43 z$o*Ta&v32wUd6uDXWw7gd&g)&N0%BZ4wC*)bm2`i(-8iS@0a)mYy4g&iKAL}$B`l^nz%c>P^=9W3cOZf z=VPF)X9l}kExO^6y81hn1ApHC9HxyNZt86TR^h52`LBkd(2iy!^SS^z90|O|eYX*A zZ(E`1KT!idC%?786$XfR8~iF2nNFe;ZJ9IAHoM6l~tGHG)_pycQkYSL%e% z4d=fySX0SB{i!9gssWu)FRI>h`$#tmbCAC-y0(>EzqE&!uL^U`LFG-K4mRgrK)bom z)pa{kP*7~_oTY`|VZqMqA4=H%6^u0O*fyCAk5?V}xt%MPTtBamS*QHeXHlco>%P40 zQ3Jwq(XSTDrJyb4fo3Kq>1f9y{RZnLgRtt{H~;jyHfYXRzq5WNjm&So-aGXjKL2~= z?ZAlql`6Ex-$eAof-=~|Bt7Qo)Cx9UQK?#z5m4tLQ(TD8V+HE2W*ymGLC#<0vQ?sd z)7^-j^~pP<+bqbSDyX9vhSD#*BVb;5ksLCWT^D)RlQy;E4q`8GPa{|gfj z%S%|&1_|>tSoS1kqFn(6Nm+~OpcOKGH;^v~e7~<#JYk#z3X~Fet;rD>I(R$q!p}e7 zUuG({sf|2q0mA+{Eavq@?no|lSuTA|t1CnT&O+ZBN`JxD5{@64`-fpraI5L?_z<~% z%pLf6kyAAfNlGS-eNgB{wj*>di&JzoNrBthp9j#nMgxymQPn_vKR2(^;@8&I_2l~- z{CU}7X*&uWD@ymc{Hq&UcF?<37vOVh(k~ba>k>dcxnl3V-GdM>%<@?5Y6_x1Ha!_w z(M-<2aR-;>My(zJy`?u^*RSwJd5Jnz>JCM)YozAQ{^@3ry>h^#ZhtsX*^iVwil@S% zPwuCi@74fu{$SN*esdyDUbML>cS8AEE9$s*=TwAI zIM_~Y3rvhphw!&w&Gu7%kmHX&9oaFNSq&9SmX-*kCS+YhpSx1J9rWI>I9_?P0}8{8 zLBqWoJ{3}e^Bh>zZu}g&mG3-Aj=y8}51g;qwjqbISFMg8s?f^dmAtk-h0yebqC$P% zhiU@TIxKX$P({-2=OIB2NP=(C;=Hs!=f~ygqY3q28DRN9xHusp1?@QcF-ajraaP2hmZAEVwUYic(51=1R zB^9Qsy(re;h3nodd|p69%j9(JX9)63_I7@s2Ah}e(DnC9An!lD_^XfI;~J=tjL@ri z--l8vS+v4Sno+h*%R^@QVZ_V%_O~jnxy*B~I zmy-?ly~f`k-(BZZA(4TEUu8?ZpbR5Z3%-FDxAQ%^gE8EE{aD2r*EtO?Uz|E)5+_o{EyGtR&Z; zq~SaB)`<^;Q)g3vOahY{UugY@7-sCh%2XAG@+ru4`-P8~;P=mrc)EsuHQu*P-g8a07pLcqZ!7)aqPR*0NzUkM0MU7`Paa+S3GMYERzo z9BYA(UdKvlbBaj&$C9#-s>${i-79h6IM_V|X%7S#n-Ux0*GomyT}6fPY17(9`Get* zS}O65Cn6PsYxMVry&WRQ&)z8pvy1X1Fzdmt{Z4`CsKcQ0eUlz&6!P;Zxt!Ana7jh~gI*A@@&T4|l)bEcpQ~mRji@aibF59D3lnC$fq(tg$4J6rpWz68@tA;MfbXaSKq00Kxr7sTx*%GiEc(; zku+^=8c$vV$zM6} zSpCDLN7USE-!SIC#IT1o@4K@ArlP;kg|P2{SneqL`=B_|Zd;UoAQSH9AKwIYh*dO=0B4Kvcn`^P*r-|h>XQ~8nz2~))0?zI0yI40 zPL0B#p*#0f-!H?^N%67{mjXu4?Hrg~+QH(_^!U}2IVa}efv*BWhb~vZlzi6g5aiDv zP1=7#-F*x_?~z{Dj2!rWxT+@7b^(fS6PF9X0L=bLJy+_~9cTyLQr?ak*+oD`oU>K# zVjqn8lG;vEI}dJMi*U3?eP?PyxoIAR`LJ?BGx;K2C-(fC`Eoz(9L|6(ZUj4Lb*7;5 zf+I`i*LCo`-S!f-cng$J$dF2;ABRyUW8-+k5%9w1=F@=XbS(dL06*+ziuy|i<1fC^ zA6|xc!8eNQJxgGHaDR-x?kA8#q{|vzLVycdZF%_<;fXtaRt4NpUVj&V`in`3P9$&z zHoi@Fx&OHUI8+*G)NAxY?SOY#*-71itTS6uG`S5#dGrqs*$e{nSj*UJZ&on>i79OR z)E>PBK)!5h#2p>thOTaq`AC}!}a0ZPq_4rWkxkC+*P~p^*HL-EP$rxQHePRKE z6w9a%=1s7RzZ++woO`x00`UT?@9#Vd1@&5{Uy=rX!Km}8pX#Zaz;n;?FQf^Z(Brv6 zfYRm?{F;y>pIEd4cli(c#mQ}N;^v^lH9rjkZRB@!^frg44FX=CvK??{8N$3cig zq~i(7e}A|0FiMgP2d_x&1*k4WWA=}k*W=Ddx@9QIH@Kw}ittm8J?cs?aDY9o>}o+! z0<7Q?lU7IltJX_=#GmVlM4VTBAFAEy!tDRUubq?x@)cPApIdinilt@}svNvA_0YW- z(p=PP2LcO_bY7c}_f$SO9lXBsXs!=?e#FnJ2|u8Gr`K(Tu{cy`6>#QMEvQ+6Jyx$& zI?(F_hpi{XQv^5>7NT2%PTO$mIBgbp=wJUCDsY|GQfz{6w`8VHD-ejtU39O>Q0#(H zN-htC2nle{aVtA>n+qVg?1Qdk?>KlE?nCk@cnH&9`e)s?acQ%VmGAP`=MR@)1UZ4` zBEo;OBoYhLE*8O)AC7yE2v0!SL>bRhPDCO^kw0Cdzo9(kAO4Fxp`Buy{B?Mu_54Vg zKFaT8mCn#T8UXmTM;BC`<^cKG{9ArU5gvZFZyF=H7r#?;*U01aTs;&o z)XU46=z}_!i@p2w2Ebj;nyn+s)0qD@OCvGU5;Y04sJO3O1AW-|6(!EjJCT?J^D1w2 zez9!?UJ)UMI#y%A^_;KTmkUjx&Gg%-Q&}Gr_#tncWLg1p$Lc)py+-}ifB0kGCCmCL z?O#Ln;?5#kk6e@o7#?Lb=?A_Z->$S>&w!_xJU;}9W&#1-NpVXnf{%BG@MRK(3H72%mblK4)*~g*gPy zB@Tk~powsxS@GoG{u?*-xUxO!BXHA5nEZWP1Js#mR;h^@1bMAlTzjcIpu30nR{9zq z=I5U@uHM%TmroUH%8_hh`WxfsmNq%H4iB!4_K&+a0vgp`=NP#K*zu-TzDp|$W;?S+ zwWIjAD=H+lIPyD?+9C>)6^O+0AF93Qv=_g204L+>`?uGY!4v)SDb!vFx86X-6!2jU zywA_O!-?vrLh{a*t$Pqope4i_Kr$a%Z0UmJh%L+e@O-k0-GCM~tQ*nbrX1>*IY(4pz=W3T4j)>)9bl z^TZ zeo!{U5)iSv!#=Y*1R|x%wqg$;JW^@HE7P~5pu;D}udDVeroVQ3n2$d^>j?Ko&C8vC zj`DWXYf_=F$3VIE@zkcmMo|5un_I%FADW4^3tsiFg~Jnro@zFK@lSMu%4DWA%RqU0 z-8?O51J0}uYYTtM1%;u+nI8k1;P8#xf#vC_{zrD;Mi(I=?!?@>nAFKpto}62A!X9h zGY-n5{8?@tE`p0kpZ_r2nFXasuX>Dt50+jG5?p| z^WdEkROfDbk5`^!SO)t@$i9){RsjFT`JMW>Sx{BE?-RrF0;nfjSFi5t1B9)VEGzQU zSpNRuWs%Bx^+KSlaI5aZ!T=OczL+U>aTy5Fo_Tv#4-c~QUrW_q8iC=ZKV9C<5Q#`9 zdrH0C`ioy5GVpyRlQau$;|Hfw5=y{a&sELz&SJo~Zl}H1A{U-fT%(ASMg3V;k@#2XN^m(-k*cmaGhH9DhO+5>7& zZn$4S^$nr|C#I}`RxE!gSAD&Eb^i#|isN&ypl^qrdF6UhI@3VfWJ=a1xfM9C)CjsP zw!p{N@a{ykE#OEbeaUb)4)Y(=co~aB!-wHpu|sE`n@)q03$&Vz$|(Qru3fn=c@F$6 z+PQg@q8+Xsk=5f)f zjsv{7u#wF>3vlD8;fEKf4=r9o;=+1ZKc;^VFMf+_A?^j29$bHvM!gDdGv4SmpJ@TB zWPF5TN+p2m-g!e3vRcSp(Vb3OnGfyy_J8c3?8fE~qe_9U2+Czhf-fkpc~u0uB}Ck( z=7yp4`gN|Y+8wwv`Pkv%>;$yt6`=lHyA0157;I)nZeZ~TxlVVpp4BXT_iULV%eWA5 z#CACs$xp&}a~I1A^^hO;=-E!e*#-D?To2wJLA(ujU1f(b%2_2(|zfvC(%-Y9k=fnZ}p>btA~!dcu-)upe&Tv>tXjQVZo6PjC!Q{>^{X5LvVa zQ7(bi1jf^%ylX%&DtDbyya)1frN5QZ>;-|PO4rY!d4J=nq))xwXCNry6X?b`kL3?E z+*Q66G3UU+zHEjL@*co0#Wxc4bP2=?yh%%v9LDCaBJ<6$lQN^=>HU-)D~3eOe=)ZI zWXo1x4O2tE{ot-|hIxv`FRR+7!MAtsik)wcKwjR$$GD0tU{J&7cbs|_X!cPrR(;LH z;t%2L7R|oY{cyN_D3V!W8TMV`yxG5R83r?^7V9zK;fur{Vh&OBaN+axewvU)$ntBY zm&|4X^M5%!4Qa|(M*yE4Nij`e4pgP+WuK-QhJMk!>CK9Luv9aSBQ0bQnz*Yq)Asel z8&-}{)wB(87k_UQG~+ILcLKF!`YVyIuRsxoPuivSb*z-;2?6LAW8=C-on@G*I4@*V*~UmoXsxIF&=} zN*4@DAoSMK!9#p`r%K#o0<3?+lF#YYQE%mQlD1z%0Qx%4{et^2eT0b8rA^d7E<7UH! zRTyyCLM!Uf1ho6^!e(=K9dg^bF5*%nr zdXae~z61yhdOS!IU4RBQlFK)$M=tofBE;6IjsC8kIVwH8nrjZ^+S*+fh*5M zVi`;*3o+lf!GqdY*_&4Di2rLfeuv;5k;q3AI$Xc&IM~HcM$W(Hif^5a{vy0&J)yHa)bkjh|cPzLa--pHDk_O*sPR5}6&@l6J z-#eqAuO`;HY>@(|)JS%{cwrpc)$bD-tY3j2Vkq952%-3m!d`c~S>6J}yr0LHOgBdj1Qu z$${YM*Y)h11CyBlJYfg*%ra2iA=T+=(>Do>sx|1ifA&M2Q(X2(wr2tD1IvdVVZ(s& zf&7`3D?dSDD{bvoUMuE5GQy4HtD&9HfKjlDh@g8&ZDOrZAf!chRo~*)W?)F+_ zq=Y!!e8b0H)g728rG76fZUMXht8UA;E%_(Fn|y-a3#UuL`a925alaxkFq>hZcYO=LjullvQ9u9dz-v^R#XRc z(J@U3ZiF{{N^kplje}U%Uu~{6^U&`|&spP*bxeO#eGX4`u+|~|%k?7zVuLV{-Zk7W zc^RA>)KBrr#RJD*IZMUZi)3?S&7v z(ZN=@U-YSqzfu!$^3!vrWXgxi`ya;n{iugbmK!=zKk$$zDY;}>vJRg24yu2q*MQmo zJwIn0)=_*_U8OddbgBv7e9KJ4-?I!l)&Bo3y&~Wg>#vWkb66RuYF!5yl~8$Zc1zb7{+GuTr|#rENWeu4kUSCdNoJ) zuGSdz8V(gy>6^mzzv#yUOPARWcyLfbKj>vG+$&NLB7Qm*5G4YD(VI}NSp^{e5# z%Rin*A^TV4G$krGFahsvYH(a0T7#k|$VKjw5aM?84{&uJFUGeG!e35U&R&@ZrwvUX zwkfWFmF2gZA9QEI=b+K)Qz(C|;B1s%I@AYp_j$eM?ij$vpKCa*dM&d8qug$|x2|=< zknuY&^bpUCL!i27@{cL_E2r>z55hnBR`&BYRZT%9|K^8&_5+yyeO^%}{lz^9Xhi)k zEuGx}0)j^#yd{_bw>L}t^hp-sQIfVR6o|j2*ldtoDs&B6bv$R&6ePv?4^$$F&2v2bW-j>5pKc5rf4)B? zmu%~oAoDu61x;)txZoM5bl7PYK3F#S_TIe^W^~DP>}jfollNKTGCWtIlV-rv#jAhw z@3m&kVkO-3!1U4TvPfAGBz03Z-$p#|7=WS-*xRpSn-ICnX!HlihPxh)9_w^@`e{ijOLMj z5MWcUQXInU52d5?RdJ_9aEtNB^fXmA=)jX}G%d^n)^VBEk-akzZ;J4k|}us&Yj`%=Hgs}-_kB@`hQyiZz2|`yFVAh_TKB4NJ7T2 z`m>!CbaZE*!15>j;U|*(9#q5J@N0*UdUk`;-z8g~P?y72QCbvH0qVh6Jyq{m8`!Z(NEBJS#a>mULV+sR8MsSjEJ$HVd+>EaH6 zk4%a7pY4YO%XPV?Hb@R!BfQxHz1aN|(lkZyL41u8cdl4g`PKvXbM9>{muldZSN?O6 z)pbx(gDsT<$&Y{R@jJq!M)iO^P9Hg31koGJ9bBzA7ml4j>h1FuhR2mN@ ziDdKgJLjRK)W)k%$%uEP_DLnr^G}%nw0H6#5$OrUZyb^2za7vHztP-EvT_@M3~lK_ zRY~ozCb0M&BprbfW-OtqJxg$7q-m{Kq#3gxpB~QTX3@^T&`V3X%oWQZI_%{#GxEQM z0~U@RiT%re+pe3srz3P6++AV$V9kZ{Du4JF1z*2vdl3eqoRq8`bK*D1r}CmeM|~1z z#w1doG(!AYN)ZPNxfkH#-Kp3ViA5;E_Ur|{VI$_h>#c|NJVJT3Tz%%x#9LjE(xD~y zsxE|cZM1Pm5#C^zzx0ehuAwzO0K~bsGBYXkV9(EtK1M6twE`x%7(5Rk7=QsQ-;z8@ z+W;P3!j;`j1#XT?Exw99kV(6Az*@K#c9xrT3~Lu)^?zzJ8G53Sb#Ug#%vfa35a{`~ zBXEUb9QrohXBRd>d8)ak=RCS?klO!U?P?B@i1_JY6Xnz;OnxWXs7WjceuA~U3^@kc zGI*aM``8EHX>d@zSh5hsE1}w>C&k%*0y%3X*>_WjmzOS8iB+oi0$z#u?Kdd@xtl-T7fu&p z_q87yTTb*f9O{GWcU|GtO2l8uJ>Nc&gXV23Q{^13ahSBe{mCSJ0??$9s1YH4-rfFl z)g$vsM4mX%<9>64@X-RiaD6u2N;LyaPOP`*KTOBuzuUhr+U27oq<0?rf@u0$F?2@QhB_tzzC4B!Js%MH3yuVS82j&kFcDh$b!KkZFJK;(*mj9rs zicmi8Fbnj29k<8nJHX*)d6hjAWAH(B$C&l?eCWs+Q+p+T5Po-DkR#CRf!jx}6DFqq ztzXdQRubF9ECIji&*m40eBtCm`LJ!>Ff{9W_2ml14$M{iVln(`96t5(biA#H__J?a zx!F0n33u_^eLDeQST@!60r)dZejbPVTVhx{8FIs{xCf5fX6#IZNe&xfp2lHTKR`fZ`+z?<*Pz>Eb z^%oB$v!Jw2Ab5w{s(P>olmFu{t~XSo=YV%AYoMug8c^eqwBfby2W=XGwo0Tp7*TyQ z_ylz}h>Gp3n}nm#s&Db)PRC#T;&-v=pVk#~FsJJ4+2Zjj#5ZlaH|oI->?G8hbu3C#{YD5T+^n57U+A0OJAJdI_4$kq*sP6o0uO;fY&~>Zj0!L;5lalR2 zE3IX4^msJi>j#VA;PIRLUiX#*Tb1|T2}OvH;1B=vgAVbavYZ`oAW(LX#m_Ei*Vet` zcy1cJxb9KK@EY*|ryORDRYv{EB9BD|WShaQ(Z<-E@B(c7@pIkm3Xz|OFB2~ruF>_t zi&-=hjfHsN5H~t>=k_$b7yH@w1+qULs(6jds z-p-Q^cc_Pe`0d=7%7{9s&g(k4Ks*cTr`}3up!mqo<3-rwR4?c)dMq5Vk&F3%CNbjL zPFH8aonyUK>+i<^M;Yy5Cy!-d(p~sHNV*Z;x$&L!|{&d}bk^xzPrt9t#)@J!=8^-UfF90?M%bvsDoDExgnK zl$bGP?TkUVEa@fM6HUv2f-l(<*x^BHUV_~fi4iDrf&E6-5|Id**rKMH;NSd5)`7*S z3)`FE#aaaWdde^0gsV?xl|*@}ndDj({w`qDblm9W!8VYVLDcDUcMv@PESWt=yo}v{ z-*7ooaKkKkGZ}VP9^v8+H7MZN&QF8ykDgV$Kyhzp?Onfq+XQ$?kcWZW20|rC4gBr9 zZJ7SP<+RU;e6$8EO9T!T9+-jcPoE}}Al~DRUuSXj+NiJPqu3Kg@^*MG%JbQ2!(l)o z<4(MCs~fBTmCA2VP|y#6WA9B)-p?8an!mPm(!3G>L+XlixOX;utwVHA*6%$qqvyBw zr&xg{$5lFvPoeMehyTComL9H(8u1#cgvqMv5aDF5NNT?gMm&~gZ=dlWT!tH_Q68qp z7Qv%?9u@O(Z#{$h3|EGL1Rs+b0M`#s;6+IfQjV<+TGm z%>COCew^LQh43fOVoYyYJe~kAqtm^FvIjByE9JA@l!TiBH4_A`=RVCtzORnk!two3 zGPwuA#}S`tsZMjoVk+z$pgLE7IRVCbJi!;w&SCWrk|y^Sug~jHeKuT-DQgxy&(~ZT zBA*ANmTV&l@8&=aFxRLw?}Q%Go_P!-uzDwCg8ft5k~|IkNhr znFm>l5dQ7kq*2Ten!-xSt#?h@%3`wAhcx<^YuM94!p(K-d&{cfi35R~+Wg6!Al_{6K~K{!@KWx-eIY z5aF5+2$%OCC|JPe&!Gt710o8u@b#BTX(o2V_Bug~`hx3~c8K2FC*S0c+ zyp9e+N7JBRSyy}Eg0^c{UgTfnPp`S+W6K(O}jZADjZP^sG zllLz_i2C1VS}uec*G#~PbdARir|U5Nk&>k37-B;FO-buj8AAn-$H&vz@R@Z1oD*<*$Wt@} z!Zn#?Bl(s=FA-_%VKg zKOZF&KCiy7jF`lJ6P4O4|j;-=xd zuuGoX<5_^BFjZ->0guLM*0GIK3*f^dw`wqBFR)ZyyJ4P@2f5U$-OESYF#l11zt^3z z!~rWHr1qwRb$ty$`Q@wYb3grH0{wUY(-Q+QFqrigW$6%< z<{|1S`;mj$|K?+|`-R!2pe*yzVm;G&;GCz|;@me5w5ejRh-t2YPq#zvWm-;wv`@Yj zCk=}Mfq>ghhH=Ep^T+<2*pSO+(?M~abjneNvJnuy?myv7&AR&ba2me8|42UChN{;Jv{@zG#pXdlwJo(Dbc~b zkJ}*|n?vEAxDlwdYn~KT2)ReWaxX> zr7TV}V;G!Jjg!BK_y^w=zc@Q^rxTnkd*A!>KjQE0!zsxJKKOwzdSwkpCzpZFu~Roj z`&-dEBvVamw~IkE%W?8^RCVwMcV|X8MKM%v;tV0O9>L;&(YFjVL$`jwbDQL4S>MD!B%6`9z_-lN0d6z2w&%EB?rVhU8b8r8NEw{eAEI6aV?1`5ez`NCDbjPWuDKJ7|5B4Xt%MP8 zg`e@o_B50~6FmRenMJN2cGC&pToda7zsjsvAEN$qZyHN=O?(6hc|z>-{qY=jevns* zF3J2HkhMw+I&*9R2(OZ=`CLc-JJYSOzN5=vDmWc?ngQh-pQ&GRBdh}_`)S^@e(J{Z z4`Ml#_lFPE1F6yMLK1-%_`Da7e9`6AdDyMAfr(=d>K z<64xcy@>h$qp~KpOX{=0KcSJ9K< zb{+H=OdP*_ya#)JQZLlBzUJY`k}BD;Z{4- z0$iw<^2lUcg!Vt|xZh=`VfhcG=3*MW^cQGNFeob?8vsfs81c{>>VkUXL>jq1Dp zd)FkdWLHAc^Piqcp#J;L(3Ifzv&jBw-swpu%f{@N@aKgB-g$^;%?gIgryu$Xdq`}1S#jWH&RtLkcxR8mT$cK+u&vbIC2oDZ&}qd*(jz8wPsL4>+#&r2s8Lt*rhFwOIUf`)GOB zDZ4TFEvH7Vo9wUrf_fU~@o~eDKa0+7j*bWyshiSxhKd+R<=Ckux=oGSoqrQdj@@R} z8iazv?1zqm5y1PNiKj2C7TQos5$8<&&Ho9z7?icVL;Z|L6?KoH^?r8Gcih?GrJ>n` z?+K3by+3V2a(VQKn^bY2~Tp|=G z%ciRWch*Ea@3c07(MJpC?F8n*x54c7@3d2p_{W{C?mdg3*Tk^SFrX4V-=?E3Io|iz ze}9>%&Ge)b?8#2tAi98ulskv;eC!>d%Z#ERg(w%8uRk!DGDYhmS+_S7hgZPG(_~*p z6Y4SlqbXhQ2Df!PbT_{(HL#yRq{6s+B{ruQw$z@b?c>_P{EtG}#-m-o@pDI}w1?62 z+r>Y$r)wIW4xEMu%Tzj!=uCiu(z%YlQ}cj_r{CbszzQHUGD(sv!~qS-j_NXhB9X+e z9`$fRj-5grm|H;8(Gylbyh!$S(3_ied* zH|EH2<3g4L9~|dlYLo39|MNwd{NChQ7R(-B1>slJDP>+(fuZ}o59_A-pqknsxnVPk z<6_TO-9r7*(-LoHybdEgenQ>iF}-2T{tRW5dT)(2!GKpSEK4hBeq6m9ZBx4lib>`q zFM%i^3KbbQyuiWS&00rag)Iuax4is1vy|q3)+kjCRQGn~vMCym6cX z%!4<)uFCwyPdDH2XP(!t299ccL2h?ju<^GZt0lX2atO?(vbb;Ed;;RVBf-13b7&pK z^r)PcDVWP_@!n!_0W1bpeOwr?1(~i&YDR~;u>2p(b-_U4lzvz+aJD^wqaAjymhgjBkF5fp+;md5jodXJ$5Du9fb>&4k zZ@!6Arz>M9e$W4K_c+26ZY%P&7|npL&-Xlp=o>NrEx#lR$E&{rd>?DjTs^c3*%=p2 z6Jv@2#e@Z;Re2w5=u==1|JDOt<8Ce}BOF@P;K%idJs!42YN)}poGVxi|&(sMRE!exqqGAGEYza;5>zDz%`8)IT<;?oLvrsYo z6uSkk9vk>nR2F-1J1&SjacMJdpJUJ1Tc@B=;o%x4W>WDN*bvBZw3q9UW5v_Kb{ka&Vl;smp0iE{^{f1$oQdseYfXVDtymhpmDkaZqjDpsphdO8N%J=_Yt zNU~roSE2Z&$~ov=IC+M=zZ#R@VwP_8qC4`hxO|;5P@YxE%v@R(=^5@u-f2UHDNvx1 zUVErI4WJ}(?tI1!FqUL>qxApVfA)p7_VGttDO99v4Y^w!3EU{}T3e3cq06)L?`%g= z|H`a(Ew@__p#B)jWRO<~6F;uI^ndBX=AYVojRAF^B+gllROWTBA(>w;YQKg zEx_f8uaSnMpsS7S_R+dUsNX`}J%{@D%kOd4Zi=r$IAbpK}1y75LPGnBZ1A0Wz!Zg(ZSc&7~(p`H1AD!u(2d)O}=6~kmS*{t^ zw?az-y4N*f^U&eup#~3^X_zCDu`=1T4mg%{ImIW(A>piCBrRD7Sh;*IIxMFJ%YU9@ z@Uw007>5h-{-%yv-5|*8R1iDG031l#V0+8B25uVrT=`6e@CTMEwG<}5z^}{@xgDc+ z%>Ilrm=XB%EW^Y0=QUQ%ry<)5orK`TCP*+grJ0Xwf^vNQW30{NV5VPK@*2w18*5Xo ztO#vk`7^4g@+4Q(#(+OLdoHdYt?T;gY+PenDZ*I`YLWUb!zX5^_^+Y*7u5u(E)jVY zu>SV!$VN;vCjTi$y|4q)>yT=%$F)|I4v-aFk^b(|D7?tib8rpS@rT-W0T#cnQd2EQx5sS!Ck;o3twSb26 zHODGZKk~??12i`51V0@)_6mFU0tFmF0|~AhlYf==wK=1&F|hDi+bCDhI#^&eWZcT3jEm?LB)i_8QX$YnCP*}L2#Y~x4Zt%Tdj-={#D=b| zKo8lX2Dz0{u$r&Y87$uejqld?Tt+;NqJt}+qOFF3eOco5>($}Fg`g-BPhW!N53Xcs zkXs&`fwGHD-@9#VAl@>YCA)JLG$$*EI4@SgyDMu8r_U^cV_f&cAXgmV5ivZ`f3*?2 z{{h?1jzoh^VE<_=NR*cv$Gq&I&vd>UIP)uUn%+ZtzV0ob=R_)#=+6s*ZGoS^1Pg&fh3-!u`R`j3z!XAbJ8 zx9{*(%ysC(&ZptLuG2<31Ra92ON<64!PSvRaOgGS&n}JZOhkS+XW#>YaOYL{)Y9C^ z@fPAUZxv&v-2Tg7b}#tpt0didSbxM@l}xS#93pi1xgd-KXDjxqspO_Z?v1jlPx}|a zg-_8;wt?BO+_B~&wM-f2e|))JZ_95q1cE%Q-S{4NK}CzanY!=2pjz6*h}e4wn)U?e z1iAWyR4=O7;?ZFkc$YmwW?}-vzbw02xYTtnf-|1ttF}YSKv!Dmqci6qSbZZ`N#nQ% z_Pd)f1C+lR>?XA8&>IJyPDaUekz?5Pmq;~l$IvbV(!PpW$NXk+C?QX@=-~vcNS%@k zv&6x)50^jsM-D=_iSJTxaozC2$ms`LhJW#2&m&GSKIca38u7K0RK}oX-;39srj!xC zl0A_!F-<2J_5I2*@V*~9)hXZ5L2~#RVs`%YY6<557yN2gVIypS$9X9|aY~3M&xJOdk({x(b9SyDWq?I^w&w`RK$YKI6Q2G>;Cu! ztvgnFo^)Y-4bDXH>{~Nhg}G6RHpE2)ICtwNPm5;+xZK*VuDo+gSp0gY{%VKVXFMqF zl0PrZVFsJ_R8&zT|c-PVC$0d z6V=uKtY0qrlt_89XAuUh|GRWLfsK|Isk*hLuCBkF7p1zMp|!e+uC;-gj=vYFmZgog zuD=(xsk)V>iMg?^)jzxaWfutzfPn})kw)Fh+RRc{-Q3Vz*Vxch*Z;NGesw)vO=}yp z(MnqrZPWMnA~7{XySxsnn;4p6d$rAMOs)N;3BC6HK3?5Q*H~8@eHJ>*%E;K5-&j}E z(p1-y-^SX|*vj8)ztTVZMeSt`O~w8>)6bv#mH%^pF0VbmKNb5law~0gLsshk|8*^R zX$n_=18oBxg98S_*i!IRJOA;sNcgWj{{OxP+#~w;1CUtushgPT=o+gV*=kzqTcNxB zEvSDaX!#%8DE}N~ZEvpYZ*bF#O5NPjOiNSC5G@2}k0eQ}ZeyjZuA^&;{{E4v71~Vx z=ac;9(3L2xEe%b6%hg{B%>H}*-EEq7{!$_gf3{KobDXuAv96`2skSaUx_2|{Xk@6J z7rnKmCK8~zrLK;lwzZ*|sTF$unx_5+XAJbkDAarkm$=j@Mf~0T75%;F{?}uzbp9L} zOlIsvif~!~9!YL+`hWfyQnd4bIFHQOOkZCYX#x4~fBw;)J^H%VGXHM%LxQxlGO;!@ zGv@#E>hJpd#;A66&d^lP%-`#Pxu&U(rWOBhRp8gN)HFfbw?|ylT2u0$|1s4eQ8&>? z*JAz8!wpTXbuCRbjji}iG)*l%{aoRmHPjMDiE>UwgQt@4zKZ>_zy@UrtW$)^Dx+=Z`e7 z_SZ5nd~NVpdcTd9BaybLjfuJazhBG5#@O0W)6!DYey_Tvt~PpZ)|NKf)_W}o{yl0h zlE6QEEeNFvTrG&S(HPOu)wbHJYx?`2y%xl(zcpUVf<%=Jy&7v;8=9WBAa%1KQ?wv= zx1dn8*uy1Fh<+*DEcX8SNS)uRy4QkI^|z|aU$CG;w{Edd(SjP?{6C-e%!1~>e*ugA zTCX2k9N++FBem_%FY@))ur{`~fm(@9!Tk3N0H$^vz6>*h#I74Uw+;`AZZ0 zzLVclk|z57*P#5r36=CegsT6)AK(9eJX)XbpP&EluRLQrOmBPXMB4WU87Du;g1x*x z-}CO3!nsU5C#69d)I7u@8kCU(A0KfV0$%t2&nkPS~0GD(M!1<#jQZbHXt|Cp=EY{cI!Ph_u8j&CrX4Zcvt` zl|+f`UO!Te^IU_(|9t%uSLr{C{=Zybe~rF>;8f`Uczq*IL3u%*|MmI-HvdYFz@P~| z46^@x9B%#((LBuXUq_LSuAQzGzm}#I8hd-yO?7S6ZcyU+yud(yR_Q>DN* zSm{_CQl-b?&Vv3w-wInC z{?E7PEg1gu?F9=)B>N*s_9p*Go`Aq$!i)Tmb@Km5{<{B|QWAAEmm;gAgRIi7g+kBO zTJ!e?G@iUjHLb0Y?Loc+nYx~_=D%*lVNGLWGg~Wl9UF5rHELSxsvB!+=^_Wo!^%d> IMvMRd0mX0QOaK4? literal 0 HcmV?d00001 diff --git a/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_feature.csv b/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_feature.csv new file mode 100644 index 000000000..993862cb7 --- /dev/null +++ b/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_feature.csv @@ -0,0 +1,9 @@ +feature,transform,sign,train_mean,train_sd,train_transformed_mean,train_transformed_sd +FEATURE1,raw,1.0,5.218315541739041,0.5232330811206761,5.218315541739041,0.5232330811206761 +FEATURE2,raw,1.0,-0.09905531550709003,0.0290821440987776,-0.09872163579199136,0.027507362950694694 +FEATURE3,raw,1.0,-0.18242617536302583,0.0750396646084921,-0.18242617536302583,0.0750396646084921 +FEATURE4,raw,1.0,-0.19067515567207452,0.15152973701964462,-0.19067515567207452,0.15152973701964462 +FEATURE5,raw,1.0,-0.09655149330236192,0.04142919523473225,-0.09655149330236192,0.04142919523473225 +FEATURE6,raw,1.0,4.448370880142472,0.30002006018773947,4.448496342999794,0.29951102956014236 +FEATURE7,raw,1.0,10.668,5.733718431708014,10.668,5.733718431708014 +FEATURE8,raw,1.0,-8484.539584319547,2530.851135611023,-8484.539584319547,2530.851135611023 diff --git a/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_rsmtool.json b/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_rsmtool.json new file mode 100644 index 000000000..912735a6f --- /dev/null +++ b/tests/data/experiments/knn-explain-diff-trunc/existing_experiment/output/knn_diff_trunc_rsmtool.json @@ -0,0 +1,43 @@ +{ + "experiment_id": "ASAP2", + "train_file": "../../../files/train.csv", + "test_file": "../../../files/test.csv", + "model": "KNeighborsRegressor", + "description": "A model which uses all features and a KNN.", + "test_label_column": "score", + "train_label_column": "score", + "features": "features.csv", + "use_scaled_predictions": true, + "trim_min": 1, + "trim_max": 6, + "id_column": "ID", + "second_human_score_column": "score2", + "length_column": "LENGTH", + "exclude_zero_scores": true, + "select_transformations": false, + "standardize_features": true, + "truncate_outliers": true, + "use_thumbnails": false, + "use_truncation_thresholds": false, + "predict_expected_scores": false, + "sign": null, + "file_format": "csv", + "candidate_column": null, + "general_sections": [ + "all" + ], + "special_sections": null, + "custom_sections": null, + "feature_subset_file": null, + "feature_subset": null, + "rater_error_variance": null, + "trim_tolerance": 0.4998, + "subgroups": [], + "min_n_per_group": null, + "skll_fixed_parameters": {}, + "skll_objective": null, + "section_order": null, + "flag_column": null, + "flag_column_test": null, + "min_items_per_candidate": null +} diff --git a/tests/data/experiments/knn-explain-diff-trunc/rsmexplain.json b/tests/data/experiments/knn-explain-diff-trunc/rsmexplain.json new file mode 100644 index 000000000..b1a606370 --- /dev/null +++ b/tests/data/experiments/knn-explain-diff-trunc/rsmexplain.json @@ -0,0 +1,13 @@ +{ + "description": "Explaning an KNeighborsRegressor model trained on all features.", + "experiment_dir": "existing_experiment", + "experiment_id": "knn_diff_trunc", + "background_data": "../../files/train.csv", + "background_kmeans_size": 50, + "explain_data": "../../files/test.csv", + "truncate_features": false, + "id_column": "ID", + "sample_size": 10, + "num_features_to_display": 15, + "show_auto_cohorts": true +} diff --git a/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc.model b/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc.model new file mode 100644 index 0000000000000000000000000000000000000000..65769794df00075acd7b08c2120fb7000dd59c9d GIT binary patch literal 58005 zcmeFZc{o+y`}mDyDv}hHLJCQy6iO|XN|cn6l8_-XhDw9dEU6SqvosKrDYNcl2$|=3 zIF5PdIK~Xm>iz!g-ShMPc71-=^XGHvx?bI9t#z-p_gZ@m_qzAm+*o}zS(*O%=efYq z$=2E4-bTjO-pt&_+|kFAegA)cdHbxr?&Ins>B(+mZsKV7J8iy`wT+Do8D!u0p9E5M zfrYt=v(b5TQ)hcet4qJLEd3{onTfNBjG2|`|Cz?4j$`!xD3is^HFLh`VD95N*Y2FH z!$o{g7B==K&YKi{ynQ@5oy;9f98K^Ad_0-A`gpQA;h>YbkGm(Ulf9!emOQ_2;+&(o zk)4UHxs#EPA+7_*?p@mtA33y3-iQ3D@aN}-KR*@!{M`8G=cYeDmHzxx_A&RIci!IA zEfewS$UPnY%aTtPIV zow?O%%TxA_PBKQ0=BFKhH|Fos7inn|35WiT#$AVO%XxN2WIi9Qxt?r}CT3RWobUxW zO>9o%UhHgX>*L94a?aTv-vEAE7Di6EXZvWa^klO&cXqTg#nHA_cGmV6oUE;UJh}gk zH2U2~ZGRWT?BJut;>l)bbjJSF?=bs?-*@Begu9U$u9u0Oskx7(41Pi^M!4}8VJ#c0 zQ!@WN8zToRQ`|v(JQr&J9tYchKcAzHCJqjGV38Hza5{I&)W*cg2|q4R_TT;G_fy1= zZLX802_8iI8O*nFz~0g9_i$w1>0`Oja)T#}#sBM>S}Og|C+f*%Vq;@}!N}Iu!~u_~ z<^SH$KZP5a*jSymvo*JKHad0D+1$xTi}&0quHPNv-+%vh1YWiOUGo1Yo?}Kj5&oMb z$|KVM7M>YDQ~z)2gdQPh>h+`lvp$0Vxqqg=)PsosTYZW02|4q+GvkSL;y#Elp+{VA zW;~HTQxbZFoLMdrKQrIV@Jzk`+c2RwQxfIRjGx(##QhTbge3A2=|p&D`rqRJrbp-# zlF*xZ9pXNT=RxSrltej%B+`j6k)H??>HkgsH~pFEGp|FW6aAYA6Oy<-5hm(S#Ltw3 z-pu%!VM33PL^~3CL_86mDgRavBA(DE${{2XKU2<3C&EN|M3~4mmc&eS8?Yi2y5 zPsqQe6Y(?Khsa07&%6$i?{AXGPt=3ZBjO23luv{SJwncm|8M$4K0?mSKQm0|&6I@R z|Lye%{h8&=4F9b?X6g~;5%O>8MEw7)9-&Xj|CVoNIuR!1%yNi$qTObu6ZI$bW~LM2 znUc`|o1CdnglFm#@qd$q{>=2h)n}$2Q9nWw=|s7IlSKJMI+2f%L_8sh_?ePOC*;ig zAg)8?BlHM4GyZSknejw^qI@Du=n?Y=5l=`Wod^?>h$rOCcp^;5zv<14C-i5w3!yht z68BBii%2KRA>`lUiS(J0D36FI^8YtEQ~z(_nej8rotaLA3HjgjiFBeK|E)cVatTS~ zpDAai6XBV1W(WlDJPo68eOksXsIPH@%tV{VgAnf2Jh#{}%tZbVC2Xr4!GI zkc2)ViE;=@#1oQ;C+bPW6OtGwM3~T@DG9xq@iW83b%=PPT*8i=Sq_m-$eH4h3<8)j3@4!h$rO#*K&w_ge1x%BoY6&cABY2g#Xs=L_AS0A$iq&YyaKJ zGI`nUZ%lMDm;xYvuGIhd!1}^-+k*x_X)8&t=g3X9@NoaB0!442x6Bh?ZD?4W&Ed@_@D?9A= z!QKQ*gMa;wmM1NZElu|OFSzYvX=-WquiL^(@sBI&e{)+{TDe=E`Ct4Izr9l|m^ql= zzR$TBHVixF_rKZn+4h<3AG70Qc7B+hA7|Hx+4XVOKA5$SX6?gS`*?PLFuOmR-5<{G zk7xY@v;Kiu|G=z&VAek{>mQi)56t=pX8i-R{()Klz^s2@);}=oADHzI%=!mr{R6Z9 zfm#2+tbbtEKQQYbnDr0L`UhtH1GD~tS^vPSe_+->FzX+f^$*PY2WI^Pv;Kiu|G=z& zVAek{>mQi)56t=pX8i-R{()Klz^s2@)<5w7fB%5*`hV-lrhn(=!0=)OF(MdAj2uQ8qlVGI=wS3Q#u!VC1I7j8hH=MuVf--u zm>^6zCJsZvWMK+1l^80f9n+5)#r*y_o&&>+5yXgKBr$RrWsDj|1EYh{#~5QQF%B3P zj2p%s!G{g_eA?>`#Af#JmnVni^K z7&(kGMh&BZ(ZT3rj4_rN2aF5G4dagS!uVnQF+rGcOdN)S$-)$3Dlt?{JEk8qiebVA zBL{{TBZv{fNMhtL${00_21W;?k1@trVjM6o7&nYN#tY+z@y7&V!ZC3e3MLCvh^fR- zG3}Us%qWHl8!H?bUW_0{1S5%&!zg3aFd7&gj6TK~V~KIVxM18c?ieqOAI2XOgbByQ zVJMg^Od+NcL&daX`Z1#zCcIE`V0bZt7!iylMh>HlQNw6pbTIlDV~i!n0po&k!?d#j_JpYVwmtG&4J;?2x3Gqk{CIRGDZ!ffziR}V~jDD z7zd0C#tq|+@xu6F{4qh8a7-MAg2}=ZVk$9IOgp9@Gm7~=_&G4V7(t8(MiL{3QO2lY zG%z|CeT*^2661hz!MI`EF(-#K>WkF=`kMj1ER0V~nxHIAB~bZWwor7se0cj|swr zW8yFrOctgPQ;DHs+A;l@Q4G_3e0>ZrMi3){k;KShlrd@;4U7&(A7hNM#5iDFFm4!k zj2Ffa)AXj2cD*ql3}M7-K9k4j31V8^#^uh4I7qV|>@)a)1BxWc}xlJ^%fW-kc^a z0`~t{1PcbOGY;(NM(6cduHT)Tfb2GgY3Zg6p*IJDDKT3aXwJJ;UMzJzD0JDyk`aMM zWN>=%K-xJ5QjL*bG!#QYVx|&T)m~ANvk>B2XEcP2%6nAI4a*_XZ0VWJiVUys!j4;hq51XlQP;V~k+98B(fGR&baSgxiiuAn55zpWv|iDm4}MeKkYlv*Ttg8oLj3=|6p8|mmdwmW#6qw0<+Og#%_Up zdJeK%o)?v!-HY~R&v~=#eI>jvo)aeeG6oLrq^)&w_<<5-IA!M)R6z2i@}`R)zaZ{O zW@Y_KDjctM;r&|Pf^1lfH}6O<1evj34F*>~TF+rnKi{AWtv{@O+2(j1>fKZ}#Y9O( z`<3^MEeq&_HQ(|-y`3C@*8-EHdtGy2>m7?tE7=(EmM*?~Xx-2mFqg+NhMDJN) zbULjAE{pIFa>lZ#c@%2i7w_r>*KuXpPwdsmuR}}zwhj$(tL+<^t6T*J;bkmxr8IE< zToKN`tP)P}Z?8IWrwuL$A3YG{KMV!$&TB2903zydkP-chXYwcPH=XY>l0!qQ3N5{P zdoq!+Q^I=h!C|yc?(9~Xwj9Wu&;Pc|+#6I@&fmV^#3XtBLmO?J?_JM@+t(kcU#u)a z(L*;L2X07(b9~nWmAE?5kWu*d1tpEJSoE#Pcuo@%jTZ^JTWAEGnmp&;x-`SFLC@3T zxx;WcQ&jK$?R1b`{J=%^aTbztJXWt<{sjy(Rg2@3o8hSYa*iT;Gem`n<$V~cLf0xH z3~gT0;km)^lkUJ^vi-g3fr(j>1E6)_*4EU!?GUkvW!|%jcz7e~dN^Q+4#rNSK`BeW z!r7oE^R&!5IK1g-qvf^%&<{C(QdI3L^i&qAOnzwt!-1#c{LE8e>{6*4_n`yE3psAv zj}Ak(wUCO~ykGETQ|3AOl4Q{1RnfhDtQ@>0DmP4fQc=T@;4|4e8cbDNtt^xqL&jEW zKg3osQ0gdkJl8G-F>m+cZ+%`y_J94ss?iNVMJ+*EOcN)Dk%B_no_k@nh_^nBz09&7 zp4q$3Q5bE9=>?ptlBRw_(y^0E!s$uq-o09;d*kD9C)qCMsYe>be$e5uPcDGldY=|Z zhEvg<5D%@V>8&7THzF!4YL0$xV><9DxeZ!reQ&>V_mllk&+6A@gH!cz=vd`oQ*jAY zTwxuvPVYreDwcXtKKGzY)n&SKKSZPNG}n-fiC(m`JxM&2wU+EZ?T-Svqdb0r%~`tK z^+P4VeP_kP+pJ&ULB{^@iK<~Z)WRldBf~%*hbp@-ANEJIRE`(kHI3x>yMBtXICHcc zp4_S~(9G;ZBNfsGM+;RL*N`aiG1Hcrq*P%Q)2kKVs__|cO8VY27 zK7W2~5{UU{vU=-dCfyVWdh@MAB0UA21SQ|)8%v;nk@~gt!CZ8A)#!1C-!E8DdYR*t z^e|XN3>~^JKTP)jfdw8h39{knR{`%K`_M?lBcdl7Z$pO_yB;;WZjFTxJdavz`?KI- zf>u~X$SAmMm-)mK)&}%yQwo#uv$OqZ-t#N_7D#r$u|i(? zM7+!)IKAln{4rw6GypR#p&4rp3Rf7SiAUUL2l=yCFCksF4A1C=IQzcj%8xFO}U zd_|BzUj$O94T%1`V^#Sr_QWob&uM0CrT&@!(?9VAslM(*_g#6$?8{gvx%+F5b`JAW z-1vLB*Ce#UMYFUwdmb}WPK=(n=|9dwk-Z}+Uk3{)#Q1w87rNioxf|JC?H(4b96?rW zy=lEFwTM>1DOwje0H>@cCN9;rgImhy>TKs9pz7rdvUrAT|W(}s8MT% z*L0K5Z>yJq<|XX_)G!j?)u1zp+-V~V+Z4JGlgJMhHVy_@WTTq*@(fh%aZsz2cN_wR zUP?L9St&&S|6;y)e>*<|qMC%9T|bXP8~-Z*R!wF~byx5FhoqZ^<^j0^_Sz}tFy=DU54gXxz%sB~r z(fgIHh1HOvZU`FTZHPx>WwJ_bLIr*^Mna zr25JEvswS#=O4$ZXx_xBjHvV`^8Ta5yt*Vj`rt{ETv3B{EqbxTHOxyp19B%%Z+snC z3N=hqCyQ+xAx>61Z@Ez+gs&7^zDK(O0?w4Z)L!3?LK6E2RuA^W1K)LzPu3iU)ko}3 z7HTFSqqWD_R|iwjy~k1SyLa^qlBx4#;O|k1Y^)EEC|LFU)FI0X$iI$kY^UZ!W z2>M&E>gjD9K@XKaIi|h@$lyBt>SRqjT-m?s*{(%%)HZO;+sNJ6O|Cy$j%dwMvmW$I zHCw{&-5`=ls^nj{tqV;qyL)tp`WSL+K|86td(eueDPNiX6jZW)Z}BykPO|+I9KXlx z<6x$!7FdQ?U!+2EY{JM{QGk)H_n%gCjlgH7xQ!#r8EAoVm|6k%6XfyPE$L%xH4yz@ z!H)N_@A`7E|9I56*t`{tVw7+Ded~tkpJGvk73na)D6G3@ln%WfEMof)0lKtUpkk`) z&-|@!!nej>dIJ4SXx-7`c>^&?Iu2*=Y6pq5k~KCfror~C{oBVjo#0$$wu`-P7}A%r zuHbw;0Yv|8)X>|)T+#%(k<~$)=MKSP(!93ApLUGhspKl(WMZ{wPRG^Vmj@>BuIf= zj$;p|_eDdS>$&~>?EPr*m0Rv>rw5UpZpW3u*Hk2Z^PrqwV>{V?fojYB4233;VN3ch zfwfs+dHKqU%Y9Ag$}>g9ijCF4#&^WOEWHu#eO&c=>#}MvPDv|#|7946^%JgZN^^bg zfwI$isdU*KM2Q(~v12rWUG6hgrg_z9tGJ5Sog-CfY3kmEr8&*0t84AXTdG5({qSV8 zza=|~feyyUw!F$`CfC1>F~b7Kj9S5r{YbF=9cIdQ{sFDfWh@kq-Susz?7S4+#sS0m zsXb`btxeSxZNq4lar(~sot?mR?b7BHH73gYH(QmL#E-){8L`**cRWLklE}XIt2!aM zd9kV1-F))-*O=7!&cEG<^eA7Sjfdue>Ois8`O{jnZNYW0|T3`5jf)GfAX44Jqp?RM9Jvk5Rz#$jXx7z3_o%UHk}7$_^WGDeYZ4`{pSp?llwch3h2KkST0)E3F#}NVmR!X zD8)uclA@Hy;bEIhP`B$_^znd->+$$bm|`tgFskq;*RRKVF`TMxb96kwAZ9l2 zNbu=4ut{+6&e`7x3Zi$dDVLhzs=Qk5POnBd9G|X38L0xI{<${P@->hRPUaj6?gb18 zPqp@(t{FkacWtvioXmuiOLW+lId;JHdC%7HMX;!?rw?;3<7y`D=cT(7e@@rLAf~;0 zrSgvtK!~S!aA9gU?3-pjzrAi8Y3>u*CMVtne77aT#gibDX4s<;;^FLDhsqz@SUJbXqkxD>;Y|)y$g$rua@;Zxa_Mb+44*V| z{PFsy7=JwT3#=#7@7(%X220hM8jATS$n6o=wXMAAs5Zd+)`J{6Tot->=>*=F4Ht-& z-z}t(^UwLzT^9gz#PHSu zC}3XxF3F;q%ztOOq^C(|FG|!J(%z-<3-Kz_#q2rao^_kC&*afm&e zl(K7qI(e?J;PFm!{aUhfxWy(fAMcBl@-HuIMFZD9Zz#>rL{G#yEa=t4h;M%Rx3kak zfEHHOCVJNsYCeAz4Z!;%!hbPGy_wnQWeF6l>!!tpHX~LJ#~QdyLl?Nev9Y}wKv&ge zr7vG?1eHB(Kc23kqL^AE+jHjaSUiD+O##B3Kq${kAxz`Ud5;6k)Z_3db zfh5*w>n_B%@(WwOa3R{K|7h>-<7S9Ui{-O?^b~o0-S$N$=GtFj%D+DAz!xeiYn^wc z(p zat636Co0afPKEbdp9M-Ch=D@IiDyYC@Za4|e5lE$!$2h#Jc%^_2DDS4T*cJ1mFzzU zFCG(;M0lThUuW4;>Nk*ycz;GBr5^^F_IuoP?tppOoJL!OV_{>5RS;Y9AdF@Bz5KW! zhaCR`?N)K`>A5iX)3E|e`vFkA_hc$mrxJOj8Wt^F)`cF8*RRzI$Vb`5tJBN$-Qh$T zPov-4X+-!>iX>7GeONmN5773E`^h8VskttV@@NRE1p?00Z)t?Hx~AuT8q%S%tKsD2 zQwFe|N@sq{(o6bZc%+5xA{UmTGm|$JZ%8!4J4QLzrZvUzv^&DSjnM)g4hj>tD!Bdv zS)PY#yP$4Rw`D*3ANzxjdg7$Z2VuA0#j~2NoP5-1 zU32|l_AdxJxunZ?-7r+tC45cnVUY8$ISRZk#y^7Iet48G>p2Mb9)9}8v@r}_jaxou zvosQ%1I;$JEM=hJcZyE)gIUzpCe7Kobsd%Tzj$P9`KWzk06c0(N1s(N(BZ0J`JG19 zh(C}&>PK5Y?3UUeohI83YBzh|4v72&K5H$zBdu=`v3@tda7)vE{S$@U36_sDtwjqy zUg2L{la1~dM$yt)N6db0L?Nzkm=|;L zx_cL@cdU+275a%jpDm8uHRX@8w!IS5=W2z`LONLJ~AE)A)+eD2zX{av+PSFIoTcY?39_&SLL)gZ;_IxW@BqBe5g zGI7N>2HAhY7_uLNPW*y`FRap&)(jY5`hcU*k_KiKqwc?G1z>dSK*~O@Z?NB+m3DwH z8gi^2eDBZdC)YpWvO3Ag&dtM>ngeq%u=@YZq$z-};~S z+Z-QCeO$sXwEl7Pk>c~^XzvUAki7|!=$f9dTi@0e&=$M#L+u6?mQ_Wph!LZsaZSUx z2QK|&`)%U7dEytX8+9rl_EbB;Kvw#K*S#G_(8{A#L&%0-^NgASd-GuwOE7os?G-WPiG> zU6x!2i+6f_fBLi!2>X55Wx~nVl7Rx3Y{^`9ydFAog_kglQ^0!zPlH0&5M)0(CEPPP zh(>(94d%VPjZ$PT*iA4skn7)IRQ2@79YY|X$90-IGJ^IvI5qI^?nJu0-wwy>jv?uT zKYd)SdeFqaZLEnEspyCHGASYKUnJJQeS(2Xd2~Pnje`1;0>#MuMq=)+sCHB@VrG!z z8w-c>N?o37GSK%&HJYEt2ap3r*y%a?(|_Lk*sqCF7(mBH!oB3z)xiAcO4-LbdeMu_ z9gZf_6-YgfDso4*3hD=YRZLQ^B6d57OLMKnLthXL+@2~ zG$9FN_gdb&4XEcpIbB4Oflg?y=TTkV1e(U>A#yLe$ol7HQpA~3E}}!9isuGD&&KoY ztvL^LQ&HQ?R~!Bm3X96;DXfg(0}48`-#% zUI1D&`)3!jGmvbBkFEvmr6re>!0~}Pdf9{s|6#_aT^!%845E{?p=Uv3MNs~5<4&8ow#(4*;8i%Z!57qatVph`tLV)LRYt?er#>(4Rlz}J^gMQyg0sKqn` zKD71+_E3L7;IeDCw^;Xs-Q}xi9SR2Erd-o8y+|r*-=5+9zMF~kU-o>dm_ODqhL~N$ zmD-eApvhrEx#h|*?*GLiT0dxLPOI-?2W+b$+58zJ%Lb3JZ_TF_y?RUxK%m~|E{CFIBNpn1+ysm`i$_H|(X6;btn>pS%(u!K%{rL2}G8zc~ z^J+#*pwQ7eCYDPaKR6p+X9)KiDxBGXdK4A8{^K5JPIO^)#5xHeU zH~2lf?|Mb03MnnG&7w8n{pf%E_b>B=^Fp3hqjkMEJ-NPgqkRt=Se?x|DBOG3{=!4DV{Jr|C;=^^4RcL;H=(abh40vpk<52Kq1a^+P zsP^2Ufw{N%yp57{$lIzQ^EjOWE8+Q~x7Z&_ygzk{4Cr|It_i)D2vaE5qQfa?|NPxM zdSGP!rihw}arE$9T8?mHCw!Y?HM}TOiFOqnrbr5vlKtmk@rubqY%QQtXlQ(%rxB$` zn7ytVYzB4rv>Q+L#*kOsp5{aa2AY4h3;NgM_c!Lv&X%GjdCzJaJ<-F=$ zLIFc)?aLh1BEvxlT77Xx*`r2y-_9Mq^J+d6M&H#tx;zTzDgI3M3`vA)&Da-|7sH6~ zzn>B1ym$F|3xtGf4n&AjQE>PL6|YxAXxZ0?G<)|}1Hs_+4OK z7OjZf|EWjbJtT?kdjXkiRK+wpGHheFkhkqc>u;~jHa*mevLbtmkGy;jQ+C^?Yt4Ru zpz{(jZ^MVA|6%`k=e%z^`2AzL(&N?B*>K^7pC2ozZ zDmyS*6Kd9i7F}5u#6VK1Lb-FdH=zmv18w#XKR|v*x{m(x zVRS@H=vrnR4YBgY->}5%t4UM+u2+tK-d`M_Y|>R!7(y$vH})^j&4$>xr<=b8jzQB$ zUmo>#Po#94h0zt>4%@~WYjq@w;bMv6-f@{0a{Q@WxN7#sxf0gvT6wXfF*Gm7=cl!1 z6Wa4^N`S|-0WDo)T2U^-Kt;7r2bCO}fGc5#S#EnDdH)WbA6+k9Xhm^Ghm4BTs!_!X zN8ao&LumimhwJy4wIYoNRyMomFwoD-nrpsyw4ro`c#aFr@#OQj-MTEOu{{N{#aEg= z9k54H8b3s~JnBK6RsB1?BL|V!#H7$Vg$zX9G<>XQ;{ZAii4v^7f8L+ixShMXxoZ$) z?yI)@(y2(IF0ttJR~m}v-TkO_;1>GFwg%8@+g_AeI|U_kMuneAWBcV$_Lj8iPGl#YonwgKOAz}n zQ*D-#36Bg2EPtrd-`WH7Hk(}QPnZC~bFV)@&Ls3CdI>KSssj7l7p~M@Z$$!fOU^fO zw2=1ugS_e}$B-Oo)&FcO=~M;-5`(KY=4PT`M)tf<3c0XyZtGqhl^&R1c!mFwS_dpR z;yn4ImqF%V{L7i!1bI?j<$o?(u@bstw!P2Q(?}Qm!ohOJERLJOq55g7uY^? zGEoF&R6a)T9wO(T>$g?Hd19zgUcGMpj&p735qp4kWJM8Ne;TtoIAaWPEs=ZXE5Sf_ zhlBUpVEau`ee;k5_W)`CxNw{8bCoWJ)$0a6Eo^5%{tbl+kthc8wA0(%R{jo2k4Xko zT$w15WwIWg>g_NlU^W`OxQBfIV<@B(_&{|MC4J5DmAROMn7oswSPTYWJA1hOnWk@` zXJ9YA<$exq4&gasl{^Zxq?_`S$C}9Xhcn^)Md677$kv%ZRw`DBHcN3Cy?M)k1MH{A zbwUy#VL^3u*ct|E+xbN@XjK3*{JdW`d~QBD{&gFS<2w|~;j8|#ZWpd-B=LgPbexTj zY=_y_QSmw;wz1%z|4Ig0l{9C{H8LAjJ+4V^I#P-V|Bb8W*3t(34yZ4UX=18$N2MBD zKA+eZ3kyVVJ2d$BBNgrVWo1eX6rOm`XhR4Uq3oj>WuIHf_51t&aMfw+LUcav<#gF< zIwTyco#T$@pQ(M}<XR zmrp)VGd#4|cwauQ7Ez;i_!q{0N3S(RJ#xEF# zFM`4c%Yui=@fT55w>oS~8(K2z>0kH04c5-PDZDMM7O~x2#`|R-1zkz&Stwgd1zL1w z^PTQI;LJA}T$Pqe?q9Q&CVV$9t3o$BC-w@)jlk^-cUFF6XQrfz$GyJ4jg3;YGdbRP zydRE?INst8tw+0kmOWOxJwUF1i+{ejweNKoBF>LR{j!$0*Yyn@-kh!1r`e43&K>Am zaHAWl?;1W{o&6edu1<@&E?-0LUsM>9$2m{8gXHAOrV^fJR6e;kR=bCR=u7gx_h`2w z8#bx92Hbz2=g*b!y3mGV+6)|*Swxfedq~=X()BAQAy33$AjK;iwLE$I(OkR|tPi@1 zottO|1)90i>ws`b+PLp{2!1c{%}8T&7)~el@8))XX_;~Zpc}8pcCn-jIZ8pbI-(=p zxvTRWDFcWhcibxdRyCL}Hn?T&mWnzZZ&|;xrU7w&pjG`$f-xR%TBqyfE94hYRyzgP zCEu$oabZJ|He|7=r)Q!Z5GqUN#e%&iML$D7I`(3 z`-k^@|9@LG+Vg%+s&;|w%0e>{3jstJ|9i70J7*a_VopG{xh?}B#P#*L#w)i9i4 zXnE!*i`uq9(@P1HedPE%`-Gk~eW4#2*~zi|!t?I8v2!u$XPeQ*Nr&iB-eI&Ws<}=W zx9`A*Q}MC}eQ4cS{oY{FYI6K6)%4#hF7^)H)F0o;p%w*k2AVk+Z{+U=qMiZ&q8Qyj?cdLKD*BpuErfpVR=OcGj((hp4&8r+3RB(I z$_wkOk(?3B))wPtt4^|O})eIT3kKM4)Ek1gddC;eYLi-O#F zcz>X4Kb=-SmMa40rX-$Y`h93fQSyo3Ko61%Dk*#15QZl7_LQ#*>_yBbf~$2NH35N}#LKn>`uOBYot-wY+?IyCHn_O+3C#PpFkmiG-oC0_K`2 zp3U9yJkAy_scz5#@!j{CA>09d=$fD?e z?5dLy=)5s!XM=S;Isfq5=wAQ0eFR;(_r-@2*M-JzxVekSWg+87{`Kz4cwewSE#C>- zuPOKh?k3cO7WcgmWt{mH-JNCR`bSr1n%BrO2^QKfgR`8*k@(8u;#b(7{Ax3ve}%gq z&C?xxQY+Yumd|VAUz7G68A^sK_RQ}k{eM?j6O1bbi{XjjKGjF56bLvP|E_dh0fZ;= z$4ZLPp{>m8l~&0B$Ozx6u5zWK1CJCIsK1&d?cZ`oXFUb~KIGu}dAouK4Qa~jBR!`k z@G;%A^=*F~T*`LlJaY0Pa@x7@WSGY=2=Q}7zvXHquV2|*HWu}{6Yc&+YtfEkp`6{F zdSk4z1KgLFb9A;01M{N?+G~$6(Az_Y+~&W%hO|VD6?PnFQX~8asmDj`-$jn0Ko2f2 zFPU^ShuZ7>H8dSk=HC##|E>eQ&}CRiIniK8@IA^R_GWY>X_0&lwr2_d5#Of=77Bvhit^mHUD}5Ik(Bew&sOu~perS-?@Q*f zsPPLnzkfGeNZP-3lQp-(eoev8LsmOj9=D^;V`^;N)7dCvk>gcOd@g~~#L3d|Fc0)T z=_cpNc)`QnC%dI4M#=TBVuzvrW#N9*x!hm*eOxbGt7WTosvbwrt*(liNz_1<^1>~d zq3zIMwIlOLNGn?LYPkJ<&IdC8#}u|LM^p4{nh?5%G?oG)a{%YqBwp&ezN zCX6BZd1DtoY+|6zXGA_-+uemYIKIW0MYoadFJ3Fyo(cWXyeVxRUA+@B;@Na7CYn%6 zz=|K{2{dG_{En^MG#7cDyu11|_B)#1yUDS+vXp%ObmvRgBWd`2jE0(O)teOXWkCY+ zdw!tOLm68`&wNKF4~A9Em3q+dtyPjgg_}?Wo2YTfO@is)gk)43TvB@1( zr2lhz|1rIvIi={-w&13wSV$QZBnyKYbmGs%m0nGXt{0Te!TLJE9Y%DRF2ny3+^`jXrUB*fyaO zwWNtfeg)+H@8FKuK7qf-$*mnM6vO9;FKOISggOR#6_9>CN$nO&$dsie;PYrJistS8 z$b-LUnL0GdwWpPwKVux2_gA)+gJXv1wJM)-80lVEzg~rkPIbj3i$83IpFX?xB~*#eb+cO9kEEL)Ds-$Zhs*E2 zR@*owz^0=rH&bgf(f-n;_jZq{|NaTxt84j3hbL2E_(S3RkXWocDL<;g@WN}B86p$ zX}?6k)%GrAUlbV{>syEh&MbQH_PQ%NPj~HiellGKJEg1mEgbn3G?tj+rc2BADZ}!dT`CJ|PTMg_V z`FLEICdo{B$H&vT{>@y9!fE~(#R~&4(t2Dih^-#Qp1r&~Ve23N?HoZN)1J(3P||BS zy!K%tC=H#Th*?~KPS7{+{9bJb+s!t#eh;9*^kLmoLQh*@d->9Uq}xqo|4r$*{P5z9 zP6#PeKCO__fcTl*4R+;afWq#cPYV^tQ9TDkGH!ej8E?EQRKwMce$Fj^(EPlE9RJ0~ zkWj>fSFq=pOn?ttKS-PJ;nSV5Wpgyu(D2QJ5UNal0dd*>43 z$o*Ta&v32wUd6uDXWw7gd&g)&N0%BZ4wC*)bm2`i(-8iS@0a)mYy4g&iKAL}$B`l^nz%c>P^=9W3cOZf z=VPF)X9l}kExO^6y81hn1ApHC9HxyNZt86TR^h52`LBkd(2iy!^SS^z90|O|eYX*A zZ(E`1KT!idC%?786$XfR8~iF2nNFe;ZJ9IAHoM6l~tGHG)_pycQkYSL%e% z4d=fySX0SB{i!9gssWu)FRI>h`$#tmbCAC-y0(>EzqE&!uL^U`LFG-K4mRgrK)bom z)pa{kP*7~_oTY`|VZqMqA4=H%6^u0O*fyCAk5?V}xt%MPTtBamS*QHeXHlco>%P40 zQ3Jwq(XSTDrJyb4fo3Kq>1f9y{RZnLgRtt{H~;jyHfYXRzq5WNjm&So-aGXjKL2~= z?ZAlql`6Ex-$eAof-=~|Bt7Qo)Cx9UQK?#z5m4tLQ(TD8V+HE2W*ymGLC#<0vQ?sd z)7^-j^~pP<+bqbSDyX9vhSD#*BVb;5ksLCWT^D)RlQy;E4q`8GPa{|gfj z%S%|&1_|>tSoS1kqFn(6Nm+~OpcOKGH;^v~e7~<#JYk#z3X~Fet;rD>I(R$q!p}e7 zUuG({sf|2q0mA+{Eavq@?no|lSuTA|t1CnT&O+ZBN`JxD5{@64`-fpraI5L?_z<~% z%pLf6kyAAfNlGS-eNgB{wj*>di&JzoNrBthp9j#nMgxymQPn_vKR2(^;@8&I_2l~- z{CU}7X*&uWD@ymc{Hq&UcF?<37vOVh(k~ba>k>dcxnl3V-GdM>%<@?5Y6_x1Ha!_w z(M-<2aR-;>My(zJy`?u^*RSwJd5Jnz>JCM)YozAQ{^@3ry>h^#ZhtsX*^iVwil@S% zPwuCi@74fu{$SN*esdyDUbML>cS8AEE9$s*=TwAI zIM_~Y3rvhphw!&w&Gu7%kmHX&9oaFNSq&9SmX-*kCS+YhpSx1J9rWI>I9_?P0}8{8 zLBqWoJ{3}e^Bh>zZu}g&mG3-Aj=y8}51g;qwjqbISFMg8s?f^dmAtk-h0yebqC$P% zhiU@TIxKX$P({-2=OIB2NP=(C;=Hs!=f~ygqY3q28DRN9xHusp1?@QcF-ajraaP2hmZAEVwUYic(51=1R zB^9Qsy(re;h3nodd|p69%j9(JX9)63_I7@s2Ah}e(DnC9An!lD_^XfI;~J=tjL@ri z--l8vS+v4Sno+h*%R^@QVZ_V%_O~jnxy*B~I zmy-?ly~f`k-(BZZA(4TEUu8?ZpbR5Z3%-FDxAQ%^gE8EE{aD2r*EtO?Uz|E)5+_o{EyGtR&Z; zq~SaB)`<^;Q)g3vOahY{UugY@7-sCh%2XAG@+ru4`-P8~;P=mrc)EsuHQu*P-g8a07pLcqZ!7)aqPR*0NzUkM0MU7`Paa+S3GMYERzo z9BYA(UdKvlbBaj&$C9#-s>${i-79h6IM_V|X%7S#n-Ux0*GomyT}6fPY17(9`Get* zS}O65Cn6PsYxMVry&WRQ&)z8pvy1X1Fzdmt{Z4`CsKcQ0eUlz&6!P;Zxt!Ana7jh~gI*A@@&T4|l)bEcpQ~mRji@aibF59D3lnC$fq(tg$4J6rpWz68@tA;MfbXaSKq00Kxr7sTx*%GiEc(; zku+^=8c$vV$zM6} zSpCDLN7USE-!SIC#IT1o@4K@ArlP;kg|P2{SneqL`=B_|Zd;UoAQSH9AKwIYh*dO=0B4Kvcn`^P*r-|h>XQ~8nz2~))0?zI0yI40 zPL0B#p*#0f-!H?^N%67{mjXu4?Hrg~+QH(_^!U}2IVa}efv*BWhb~vZlzi6g5aiDv zP1=7#-F*x_?~z{Dj2!rWxT+@7b^(fS6PF9X0L=bLJy+_~9cTyLQr?ak*+oD`oU>K# zVjqn8lG;vEI}dJMi*U3?eP?PyxoIAR`LJ?BGx;K2C-(fC`Eoz(9L|6(ZUj4Lb*7;5 zf+I`i*LCo`-S!f-cng$J$dF2;ABRyUW8-+k5%9w1=F@=XbS(dL06*+ziuy|i<1fC^ zA6|xc!8eNQJxgGHaDR-x?kA8#q{|vzLVycdZF%_<;fXtaRt4NpUVj&V`in`3P9$&z zHoi@Fx&OHUI8+*G)NAxY?SOY#*-71itTS6uG`S5#dGrqs*$e{nSj*UJZ&on>i79OR z)E>PBK)!5h#2p>thOTaq`AC}!}a0ZPq_4rWkxkC+*P~p^*HL-EP$rxQHePRKE z6w9a%=1s7RzZ++woO`x00`UT?@9#Vd1@&5{Uy=rX!Km}8pX#Zaz;n;?FQf^Z(Brv6 zfYRm?{F;y>pIEd4cli(c#mQ}N;^v^lH9rjkZRB@!^frg44FX=CvK??{8N$3cig zq~i(7e}A|0FiMgP2d_x&1*k4WWA=}k*W=Ddx@9QIH@Kw}ittm8J?cs?aDY9o>}o+! z0<7Q?lU7IltJX_=#GmVlM4VTBAFAEy!tDRUubq?x@)cPApIdinilt@}svNvA_0YW- z(p=PP2LcO_bY7c}_f$SO9lXBsXs!=?e#FnJ2|u8Gr`K(Tu{cy`6>#QMEvQ+6Jyx$& zI?(F_hpi{XQv^5>7NT2%PTO$mIBgbp=wJUCDsY|GQfz{6w`8VHD-ejtU39O>Q0#(H zN-htC2nle{aVtA>n+qVg?1Qdk?>KlE?nCk@cnH&9`e)s?acQ%VmGAP`=MR@)1UZ4` zBEo;OBoYhLE*8O)AC7yE2v0!SL>bRhPDCO^kw0Cdzo9(kAO4Fxp`Buy{B?Mu_54Vg zKFaT8mCn#T8UXmTM;BC`<^cKG{9ArU5gvZFZyF=H7r#?;*U01aTs;&o z)XU46=z}_!i@p2w2Ebj;nyn+s)0qD@OCvGU5;Y04sJO3O1AW-|6(!EjJCT?J^D1w2 zez9!?UJ)UMI#y%A^_;KTmkUjx&Gg%-Q&}Gr_#tncWLg1p$Lc)py+-}ifB0kGCCmCL z?O#Ln;?5#kk6e@o7#?Lb=?A_Z->$S>&w!_xJU;}9W&#1-NpVXnf{%BG@MRK(3H72%mblK4)*~g*gPy zB@Tk~powsxS@GoG{u?*-xUxO!BXHA5nEZWP1Js#mR;h^@1bMAlTzjcIpu30nR{9zq z=I5U@uHM%TmroUH%8_hh`WxfsmNq%H4iB!4_K&+a0vgp`=NP#K*zu-TzDp|$W;?S+ zwWIjAD=H+lIPyD?+9C>)6^O+0AF93Qv=_g204L+>`?uGY!4v)SDb!vFx86X-6!2jU zywA_O!-?vrLh{a*t$Pqope4i_Kr$a%Z0UmJh%L+e@O-k0-GCM~tQ*nbrX1>*IY(4pz=W3T4j)>)9bl z^TZ zeo!{U5)iSv!#=Y*1R|x%wqg$;JW^@HE7P~5pu;D}udDVeroVQ3n2$d^>j?Ko&C8vC zj`DWXYf_=F$3VIE@zkcmMo|5un_I%FADW4^3tsiFg~Jnro@zFK@lSMu%4DWA%RqU0 z-8?O51J0}uYYTtM1%;u+nI8k1;P8#xf#vC_{zrD;Mi(I=?!?@>nAFKpto}62A!X9h zGY-n5{8?@tE`p0kpZ_r2nFXasuX>Dt50+jG5?p| z^WdEkROfDbk5`^!SO)t@$i9){RsjFT`JMW>Sx{BE?-RrF0;nfjSFi5t1B9)VEGzQU zSpNRuWs%Bx^+KSlaI5aZ!T=OczL+U>aTy5Fo_Tv#4-c~QUrW_q8iC=ZKV9C<5Q#`9 zdrH0C`ioy5GVpyRlQau$;|Hfw5=y{a&sELz&SJo~Zl}H1A{U-fT%(ASMg3V;k@#2XN^m(-k*cmaGhH9DhO+5>7& zZn$4S^$nr|C#I}`RxE!gSAD&Eb^i#|isN&ypl^qrdF6UhI@3VfWJ=a1xfM9C)CjsP zw!p{N@a{ykE#OEbeaUb)4)Y(=co~aB!-wHpu|sE`n@)q03$&Vz$|(Qru3fn=c@F$6 z+PQg@q8+Xsk=5f)f zjsv{7u#wF>3vlD8;fEKf4=r9o;=+1ZKc;^VFMf+_A?^j29$bHvM!gDdGv4SmpJ@TB zWPF5TN+p2m-g!e3vRcSp(Vb3OnGfyy_J8c3?8fE~qe_9U2+Czhf-fkpc~u0uB}Ck( z=7yp4`gN|Y+8wwv`Pkv%>;$yt6`=lHyA0157;I)nZeZ~TxlVVpp4BXT_iULV%eWA5 z#CACs$xp&}a~I1A^^hO;=-E!e*#-D?To2wJLA(ujU1f(b%2_2(|zfvC(%-Y9k=fnZ}p>btA~!dcu-)upe&Tv>tXjQVZo6PjC!Q{>^{X5LvVa zQ7(bi1jf^%ylX%&DtDbyya)1frN5QZ>;-|PO4rY!d4J=nq))xwXCNry6X?b`kL3?E z+*Q66G3UU+zHEjL@*co0#Wxc4bP2=?yh%%v9LDCaBJ<6$lQN^=>HU-)D~3eOe=)ZI zWXo1x4O2tE{ot-|hIxv`FRR+7!MAtsik)wcKwjR$$GD0tU{J&7cbs|_X!cPrR(;LH z;t%2L7R|oY{cyN_D3V!W8TMV`yxG5R83r?^7V9zK;fur{Vh&OBaN+axewvU)$ntBY zm&|4X^M5%!4Qa|(M*yE4Nij`e4pgP+WuK-QhJMk!>CK9Luv9aSBQ0bQnz*Yq)Asel z8&-}{)wB(87k_UQG~+ILcLKF!`YVyIuRsxoPuivSb*z-;2?6LAW8=C-on@G*I4@*V*~UmoXsxIF&=} zN*4@DAoSMK!9#p`r%K#o0<3?+lF#YYQE%mQlD1z%0Qx%4{et^2eT0b8rA^d7E<7UH! zRTyyCLM!Uf1ho6^!e(=K9dg^bF5*%nr zdXae~z61yhdOS!IU4RBQlFK)$M=tofBE;6IjsC8kIVwH8nrjZ^+S*+fh*5M zVi`;*3o+lf!GqdY*_&4Di2rLfeuv;5k;q3AI$Xc&IM~HcM$W(Hif^5a{vy0&J)yHa)bkjh|cPzLa--pHDk_O*sPR5}6&@l6J z-#eqAuO`;HY>@(|)JS%{cwrpc)$bD-tY3j2Vkq952%-3m!d`c~S>6J}yr0LHOgBdj1Qu z$${YM*Y)h11CyBlJYfg*%ra2iA=T+=(>Do>sx|1ifA&M2Q(X2(wr2tD1IvdVVZ(s& zf&7`3D?dSDD{bvoUMuE5GQy4HtD&9HfKjlDh@g8&ZDOrZAf!chRo~*)W?)F+_ zq=Y!!e8b0H)g728rG76fZUMXht8UA;E%_(Fn|y-a3#UuL`a925alaxkFq>hZcYO=LjullvQ9u9dz-v^R#XRc z(J@U3ZiF{{N^kplje}U%Uu~{6^U&`|&spP*bxeO#eGX4`u+|~|%k?7zVuLV{-Zk7W zc^RA>)KBrr#RJD*IZMUZi)3?S&7v z(ZN=@U-YSqzfu!$^3!vrWXgxi`ya;n{iugbmK!=zKk$$zDY;}>vJRg24yu2q*MQmo zJwIn0)=_*_U8OddbgBv7e9KJ4-?I!l)&Bo3y&~Wg>#vWkb66RuYF!5yl~8$Zc1zb7{+GuTr|#rENWeu4kUSCdNoJ) zuGSdz8V(gy>6^mzzv#yUOPARWcyLfbKj>vG+$&NLB7Qm*5G4YD(VI}NSp^{e5# z%Rin*A^TV4G$krGFahsvYH(a0T7#k|$VKjw5aM?84{&uJFUGeG!e35U&R&@ZrwvUX zwkfWFmF2gZA9QEI=b+K)Qz(C|;B1s%I@AYp_j$eM?ij$vpKCa*dM&d8qug$|x2|=< zknuY&^bpUCL!i27@{cL_E2r>z55hnBR`&BYRZT%9|K^8&_5+yyeO^%}{lz^9Xhi)k zEuGx}0)j^#yd{_bw>L}t^hp-sQIfVR6o|j2*ldtoDs&B6bv$R&6ePv?4^$$F&2v2bW-j>5pKc5rf4)B? zmu%~oAoDu61x;)txZoM5bl7PYK3F#S_TIe^W^~DP>}jfollNKTGCWtIlV-rv#jAhw z@3m&kVkO-3!1U4TvPfAGBz03Z-$p#|7=WS-*xRpSn-ICnX!HlihPxh)9_w^@`e{ijOLMj z5MWcUQXInU52d5?RdJ_9aEtNB^fXmA=)jX}G%d^n)^VBEk-akzZ;J4k|}us&Yj`%=Hgs}-_kB@`hQyiZz2|`yFVAh_TKB4NJ7T2 z`m>!CbaZE*!15>j;U|*(9#q5J@N0*UdUk`;-z8g~P?y72QCbvH0qVh6Jyq{m8`!Z(NEBJS#a>mULV+sR8MsSjEJ$HVd+>EaH6 zk4%a7pY4YO%XPV?Hb@R!BfQxHz1aN|(lkZyL41u8cdl4g`PKvXbM9>{muldZSN?O6 z)pbx(gDsT<$&Y{R@jJq!M)iO^P9Hg31koGJ9bBzA7ml4j>h1FuhR2mN@ ziDdKgJLjRK)W)k%$%uEP_DLnr^G}%nw0H6#5$OrUZyb^2za7vHztP-EvT_@M3~lK_ zRY~ozCb0M&BprbfW-OtqJxg$7q-m{Kq#3gxpB~QTX3@^T&`V3X%oWQZI_%{#GxEQM z0~U@RiT%re+pe3srz3P6++AV$V9kZ{Du4JF1z*2vdl3eqoRq8`bK*D1r}CmeM|~1z z#w1doG(!AYN)ZPNxfkH#-Kp3ViA5;E_Ur|{VI$_h>#c|NJVJT3Tz%%x#9LjE(xD~y zsxE|cZM1Pm5#C^zzx0ehuAwzO0K~bsGBYXkV9(EtK1M6twE`x%7(5Rk7=QsQ-;z8@ z+W;P3!j;`j1#XT?Exw99kV(6Az*@K#c9xrT3~Lu)^?zzJ8G53Sb#Ug#%vfa35a{`~ zBXEUb9QrohXBRd>d8)ak=RCS?klO!U?P?B@i1_JY6Xnz;OnxWXs7WjceuA~U3^@kc zGI*aM``8EHX>d@zSh5hsE1}w>C&k%*0y%3X*>_WjmzOS8iB+oi0$z#u?Kdd@xtl-T7fu&p z_q87yTTb*f9O{GWcU|GtO2l8uJ>Nc&gXV23Q{^13ahSBe{mCSJ0??$9s1YH4-rfFl z)g$vsM4mX%<9>64@X-RiaD6u2N;LyaPOP`*KTOBuzuUhr+U27oq<0?rf@u0$F?2@QhB_tzzC4B!Js%MH3yuVS82j&kFcDh$b!KkZFJK;(*mj9rs zicmi8Fbnj29k<8nJHX*)d6hjAWAH(B$C&l?eCWs+Q+p+T5Po-DkR#CRf!jx}6DFqq ztzXdQRubF9ECIji&*m40eBtCm`LJ!>Ff{9W_2ml14$M{iVln(`96t5(biA#H__J?a zx!F0n33u_^eLDeQST@!60r)dZejbPVTVhx{8FIs{xCf5fX6#IZNe&xfp2lHTKR`fZ`+z?<*Pz>Eb z^%oB$v!Jw2Ab5w{s(P>olmFu{t~XSo=YV%AYoMug8c^eqwBfby2W=XGwo0Tp7*TyQ z_ylz}h>Gp3n}nm#s&Db)PRC#T;&-v=pVk#~FsJJ4+2Zjj#5ZlaH|oI->?G8hbu3C#{YD5T+^n57U+A0OJAJdI_4$kq*sP6o0uO;fY&~>Zj0!L;5lalR2 zE3IX4^msJi>j#VA;PIRLUiX#*Tb1|T2}OvH;1B=vgAVbavYZ`oAW(LX#m_Ei*Vet` zcy1cJxb9KK@EY*|ryORDRYv{EB9BD|WShaQ(Z<-E@B(c7@pIkm3Xz|OFB2~ruF>_t zi&-=hjfHsN5H~t>=k_$b7yH@w1+qULs(6jds z-p-Q^cc_Pe`0d=7%7{9s&g(k4Ks*cTr`}3up!mqo<3-rwR4?c)dMq5Vk&F3%CNbjL zPFH8aonyUK>+i<^M;Yy5Cy!-d(p~sHNV*Z;x$&L!|{&d}bk^xzPrt9t#)@J!=8^-UfF90?M%bvsDoDExgnK zl$bGP?TkUVEa@fM6HUv2f-l(<*x^BHUV_~fi4iDrf&E6-5|Id**rKMH;NSd5)`7*S z3)`FE#aaaWdde^0gsV?xl|*@}ndDj({w`qDblm9W!8VYVLDcDUcMv@PESWt=yo}v{ z-*7ooaKkKkGZ}VP9^v8+H7MZN&QF8ykDgV$Kyhzp?Onfq+XQ$?kcWZW20|rC4gBr9 zZJ7SP<+RU;e6$8EO9T!T9+-jcPoE}}Al~DRUuSXj+NiJPqu3Kg@^*MG%JbQ2!(l)o z<4(MCs~fBTmCA2VP|y#6WA9B)-p?8an!mPm(!3G>L+XlixOX;utwVHA*6%$qqvyBw zr&xg{$5lFvPoeMehyTComL9H(8u1#cgvqMv5aDF5NNT?gMm&~gZ=dlWT!tH_Q68qp z7Qv%?9u@O(Z#{$h3|EGL1Rs+b0M`#s;6+IfQjV<+TGm z%>COCew^LQh43fOVoYyYJe~kAqtm^FvIjByE9JA@l!TiBH4_A`=RVCtzORnk!two3 zGPwuA#}S`tsZMjoVk+z$pgLE7IRVCbJi!;w&SCWrk|y^Sug~jHeKuT-DQgxy&(~ZT zBA*ANmTV&l@8&=aFxRLw?}Q%Go_P!-uzDwCg8ft5k~|IkNhr znFm>l5dQ7kq*2Ten!-xSt#?h@%3`wAhcx<^YuM94!p(K-d&{cfi35R~+Wg6!Al_{6K~K{!@KWx-eIY z5aF5+2$%OCC|JPe&!Gt710o8u@b#BTX(o2V_Bug~`hx3~c8K2FC*S0c+ zyp9e+N7JBRSyy}Eg0^c{UgTfnPp`S+W6K(O}jZADjZP^sG zllLz_i2C1VS}uec*G#~PbdARir|U5Nk&>k37-B;FO-buj8AAn-$H&vz@R@Z1oD*<*$Wt@} z!Zn#?Bl(s=FA-_%VKg zKOZF&KCiy7jF`lJ6P4O4|j;-=xd zuuGoX<5_^BFjZ->0guLM*0GIK3*f^dw`wqBFR)ZyyJ4P@2f5U$-OESYF#l11zt^3z z!~rWHr1qwRb$ty$`Q@wYb3grH0{wUY(-Q+QFqrigW$6%< z<{|1S`;mj$|K?+|`-R!2pe*yzVm;G&;GCz|;@me5w5ejRh-t2YPq#zvWm-;wv`@Yj zCk=}Mfq>ghhH=Ep^T+<2*pSO+(?M~abjneNvJnuy?myv7&AR&ba2me8|42UChN{;Jv{@zG#pXdlwJo(Dbc~b zkJ}*|n?vEAxDlwdYn~KT2)ReWaxX> zr7TV}V;G!Jjg!BK_y^w=zc@Q^rxTnkd*A!>KjQE0!zsxJKKOwzdSwkpCzpZFu~Roj z`&-dEBvVamw~IkE%W?8^RCVwMcV|X8MKM%v;tV0O9>L;&(YFjVL$`jwbDQL4S>MD!B%6`9z_-lN0d6z2w&%EB?rVhU8b8r8NEw{eAEI6aV?1`5ez`NCDbjPWuDKJ7|5B4Xt%MP8 zg`e@o_B50~6FmRenMJN2cGC&pToda7zsjsvAEN$qZyHN=O?(6hc|z>-{qY=jevns* zF3J2HkhMw+I&*9R2(OZ=`CLc-JJYSOzN5=vDmWc?ngQh-pQ&GRBdh}_`)S^@e(J{Z z4`Ml#_lFPE1F6yMLK1-%_`Da7e9`6AdDyMAfr(=d>K z<64xcy@>h$qp~KpOX{=0KcSJ9K< zb{+H=OdP*_ya#)JQZLlBzUJY`k}BD;Z{4- z0$iw<^2lUcg!Vt|xZh=`VfhcG=3*MW^cQGNFeob?8vsfs81c{>>VkUXL>jq1Dp zd)FkdWLHAc^Piqcp#J;L(3Ifzv&jBw-swpu%f{@N@aKgB-g$^;%?gIgryu$Xdq`}1S#jWH&RtLkcxR8mT$cK+u&vbIC2oDZ&}qd*(jz8wPsL4>+#&r2s8Lt*rhFwOIUf`)GOB zDZ4TFEvH7Vo9wUrf_fU~@o~eDKa0+7j*bWyshiSxhKd+R<=Ckux=oGSoqrQdj@@R} z8iazv?1zqm5y1PNiKj2C7TQos5$8<&&Ho9z7?icVL;Z|L6?KoH^?r8Gcih?GrJ>n` z?+K3by+3V2a(VQKn^bY2~Tp|=G z%ciRWch*Ea@3c07(MJpC?F8n*x54c7@3d2p_{W{C?mdg3*Tk^SFrX4V-=?E3Io|iz ze}9>%&Ge)b?8#2tAi98ulskv;eC!>d%Z#ERg(w%8uRk!DGDYhmS+_S7hgZPG(_~*p z6Y4SlqbXhQ2Df!PbT_{(HL#yRq{6s+B{ruQw$z@b?c>_P{EtG}#-m-o@pDI}w1?62 z+r>Y$r)wIW4xEMu%Tzj!=uCiu(z%YlQ}cj_r{CbszzQHUGD(sv!~qS-j_NXhB9X+e z9`$fRj-5grm|H;8(Gylbyh!$S(3_ied* zH|EH2<3g4L9~|dlYLo39|MNwd{NChQ7R(-B1>slJDP>+(fuZ}o59_A-pqknsxnVPk z<6_TO-9r7*(-LoHybdEgenQ>iF}-2T{tRW5dT)(2!GKpSEK4hBeq6m9ZBx4lib>`q zFM%i^3KbbQyuiWS&00rag)Iuax4is1vy|q3)+kjCRQGn~vMCym6cX z%!4<)uFCwyPdDH2XP(!t299ccL2h?ju<^GZt0lX2atO?(vbb;Ed;;RVBf-13b7&pK z^r)PcDVWP_@!n!_0W1bpeOwr?1(~i&YDR~;u>2p(b-_U4lzvz+aJD^wqaAjymhgjBkF5fp+;md5jodXJ$5Du9fb>&4k zZ@!6Arz>M9e$W4K_c+26ZY%P&7|npL&-Xlp=o>NrEx#lR$E&{rd>?DjTs^c3*%=p2 z6Jv@2#e@Z;Re2w5=u==1|JDOt<8Ce}BOF@P;K%idJs!42YN)}poGVxi|&(sMRE!exqqGAGEYza;5>zDz%`8)IT<;?oLvrsYo z6uSkk9vk>nR2F-1J1&SjacMJdpJUJ1Tc@B=;o%x4W>WDN*bvBZw3q9UW5v_Kb{ka&Vl;smp0iE{^{f1$oQdseYfXVDtymhpmDkaZqjDpsphdO8N%J=_Yt zNU~roSE2Z&$~ov=IC+M=zZ#R@VwP_8qC4`hxO|;5P@YxE%v@R(=^5@u-f2UHDNvx1 zUVErI4WJ}(?tI1!FqUL>qxApVfA)p7_VGttDO99v4Y^w!3EU{}T3e3cq06)L?`%g= z|H`a(Ew@__p#B)jWRO<~6F;uI^ndBX=AYVojRAF^B+gllROWTBA(>w;YQKg zEx_f8uaSnMpsS7S_R+dUsNX`}J%{@D%kOd4Zi=r$IAbpK}1y75LPGnBZ1A0Wz!Zg(ZSc&7~(p`H1AD!u(2d)O}=6~kmS*{t^ zw?az-y4N*f^U&eup#~3^X_zCDu`=1T4mg%{ImIW(A>piCBrRD7Sh;*IIxMFJ%YU9@ z@Uw007>5h-{-%yv-5|*8R1iDG031l#V0+8B25uVrT=`6e@CTMEwG<}5z^}{@xgDc+ z%>Ilrm=XB%EW^Y0=QUQ%ry<)5orK`TCP*+grJ0Xwf^vNQW30{NV5VPK@*2w18*5Xo ztO#vk`7^4g@+4Q(#(+OLdoHdYt?T;gY+PenDZ*I`YLWUb!zX5^_^+Y*7u5u(E)jVY zu>SV!$VN;vCjTi$y|4q)>yT=%$F)|I4v-aFk^b(|D7?tib8rpS@rT-W0T#cnQd2EQx5sS!Ck;o3twSb26 zHODGZKk~??12i`51V0@)_6mFU0tFmF0|~AhlYf==wK=1&F|hDi+bCDhI#^&eWZcT3jEm?LB)i_8QX$YnCP*}L2#Y~x4Zt%Tdj-={#D=b| zKo8lX2Dz0{u$r&Y87$uejqld?Tt+;NqJt}+qOFF3eOco5>($}Fg`g-BPhW!N53Xcs zkXs&`fwGHD-@9#VAl@>YCA)JLG$$*EI4@SgyDMu8r_U^cV_f&cAXgmV5ivZ`f3*?2 z{{h?1jzoh^VE<_=NR*cv$Gq&I&vd>UIP)uUn%+ZtzV0ob=R_)#=+6s*ZGoS^1Pg&fh3-!u`R`j3z!XAbJ8 zx9{*(%ysC(&ZptLuG2<31Ra92ON<64!PSvRaOgGS&n}JZOhkS+XW#>YaOYL{)Y9C^ z@fPAUZxv&v-2Tg7b}#tpt0didSbxM@l}xS#93pi1xgd-KXDjxqspO_Z?v1jlPx}|a zg-_8;wt?BO+_B~&wM-f2e|))JZ_95q1cE%Q-S{4NK}CzanY!=2pjz6*h}e4wn)U?e z1iAWyR4=O7;?ZFkc$YmwW?}-vzbw02xYTtnf-|1ttF}YSKv!Dmqci6qSbZZ`N#nQ% z_Pd)f1C+lR>?XA8&>IJyPDaUekz?5Pmq;~l$IvbV(!PpW$NXk+C?QX@=-~vcNS%@k zv&6x)50^jsM-D=_iSJTxaozC2$ms`LhJW#2&m&GSKIca38u7K0RK}oX-;39srj!xC zl0A_!F-<2J_5I2*@V*~9)hXZ5L2~#RVs`%YY6<557yN2gVIypS$9X9|aY~3M&xJOdk({x(b9SyDWq?I^w&w`RK$YKI6Q2G>;Cu! ztvgnFo^)Y-4bDXH>{~Nhg}G6RHpE2)ICtwNPm5;+xZK*VuDo+gSp0gY{%VKVXFMqF zl0PrZVFsJ_R8&zT|c-PVC$0d z6V=uKtY0qrlt_89XAuUh|GRWLfsK|Isk*hLuCBkF7p1zMp|!e+uC;-gj=vYFmZgog zuD=(xsk)V>iMg?^)jzxaWfutzfPn})kw)Fh+RRc{-Q3Vz*Vxch*Z;NGesw)vO=}yp z(MnqrZPWMnA~7{XySxsnn;4p6d$rAMOs)N;3BC6HK3?5Q*H~8@eHJ>*%E;K5-&j}E z(p1-y-^SX|*vj8)ztTVZMeSt`O~w8>)6bv#mH%^pF0VbmKNb5law~0gLsshk|8*^R zX$n_=18oBxg98S_*i!IRJOA;sNcgWj{{OxP+#~w;1CUtushgPT=o+gV*=kzqTcNxB zEvSDaX!#%8DE}N~ZEvpYZ*bF#O5NPjOiNSC5G@2}k0eQ}ZeyjZuA^&;{{E4v71~Vx z=ac;9(3L2xEe%b6%hg{B%>H}*-EEq7{!$_gf3{KobDXuAv96`2skSaUx_2|{Xk@6J z7rnKmCK8~zrLK;lwzZ*|sTF$unx_5+XAJbkDAarkm$=j@Mf~0T75%;F{?}uzbp9L} zOlIsvif~!~9!YL+`hWfyQnd4bIFHQOOkZCYX#x4~fBw;)J^H%VGXHM%LxQxlGO;!@ zGv@#E>hJpd#;A66&d^lP%-`#Pxu&U(rWOBhRp8gN)HFfbw?|ylT2u0$|1s4eQ8&>? z*JAz8!wpTXbuCRbjji}iG)*l%{aoRmHPjMDiE>UwgQt@4zKZ>_zy@UrtW$)^Dx+=Z`e7 z_SZ5nd~NVpdcTd9BaybLjfuJazhBG5#@O0W)6!DYey_Tvt~PpZ)|NKf)_W}o{yl0h zlE6QEEeNFvTrG&S(HPOu)wbHJYx?`2y%xl(zcpUVf<%=Jy&7v;8=9WBAa%1KQ?wv= zx1dn8*uy1Fh<+*DEcX8SNS)uRy4QkI^|z|aU$CG;w{Edd(SjP?{6C-e%!1~>e*ugA zTCX2k9N++FBem_%FY@))ur{`~fm(@9!Tk3N0H$^vz6>*h#I74Uw+;`AZZ0 zzLVclk|z57*P#5r36=CegsT6)AK(9eJX)XbpP&EluRLQrOmBPXMB4WU87Du;g1x*x z-}CO3!nsU5C#69d)I7u@8kCU(A0KfV0$%t2&nkPS~0GD(M!1<#jQZbHXt|Cp=EY{cI!Ph_u8j&CrX4Zcvt` zl|+f`UO!Te^IU_(|9t%uSLr{C{=Zybe~rF>;8f`Uczq*IL3u%*|MmI-HvdYFz@P~| z46^@x9B%#((LBuXUq_LSuAQzGzm}#I8hd-yO?7S6ZcyU+yud(yR_Q>DN* zSm{_CQl-b?&Vv3w-wInC z{?E7PEg1gu?F9=)B>N*s_9p*Go`Aq$!i)Tmb@Km5{<{B|QWAAEmm;gAgRIi7g+kBO zTJ!e?G@iUjHLb0Y?Loc+nYx~_=D%*lVNGLWGg~Wl9UF5rHELSxsvB!+=^_Wo!^%d> IMvMRd0mX0QOaK4? literal 0 HcmV?d00001 diff --git a/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_feature.csv b/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_feature.csv new file mode 100644 index 000000000..993862cb7 --- /dev/null +++ b/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_feature.csv @@ -0,0 +1,9 @@ +feature,transform,sign,train_mean,train_sd,train_transformed_mean,train_transformed_sd +FEATURE1,raw,1.0,5.218315541739041,0.5232330811206761,5.218315541739041,0.5232330811206761 +FEATURE2,raw,1.0,-0.09905531550709003,0.0290821440987776,-0.09872163579199136,0.027507362950694694 +FEATURE3,raw,1.0,-0.18242617536302583,0.0750396646084921,-0.18242617536302583,0.0750396646084921 +FEATURE4,raw,1.0,-0.19067515567207452,0.15152973701964462,-0.19067515567207452,0.15152973701964462 +FEATURE5,raw,1.0,-0.09655149330236192,0.04142919523473225,-0.09655149330236192,0.04142919523473225 +FEATURE6,raw,1.0,4.448370880142472,0.30002006018773947,4.448496342999794,0.29951102956014236 +FEATURE7,raw,1.0,10.668,5.733718431708014,10.668,5.733718431708014 +FEATURE8,raw,1.0,-8484.539584319547,2530.851135611023,-8484.539584319547,2530.851135611023 diff --git a/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_rsmtool.json b/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_rsmtool.json new file mode 100644 index 000000000..882d9c80f --- /dev/null +++ b/tests/data/experiments/knn-explain-same-trunc/existing_experiment/output/knn_same_trunc_rsmtool.json @@ -0,0 +1,43 @@ +{ + "experiment_id": "ASAP2", + "train_file": "../../../files/train.csv", + "test_file": "../../../files/test.csv", + "model": "KNeighborsRegressor", + "description": "A model which uses all features and a KNN.", + "test_label_column": "score", + "train_label_column": "score", + "features": "features.csv", + "use_scaled_predictions": true, + "trim_min": 1, + "trim_max": 6, + "id_column": "ID", + "second_human_score_column": "score2", + "length_column": "LENGTH", + "exclude_zero_scores": true, + "select_transformations": false, + "standardize_features": true, + "truncate_outliers": false, + "use_thumbnails": false, + "use_truncation_thresholds": false, + "predict_expected_scores": false, + "sign": null, + "file_format": "csv", + "candidate_column": null, + "general_sections": [ + "all" + ], + "special_sections": null, + "custom_sections": null, + "feature_subset_file": null, + "feature_subset": null, + "rater_error_variance": null, + "trim_tolerance": 0.4998, + "subgroups": [], + "min_n_per_group": null, + "skll_fixed_parameters": {}, + "skll_objective": null, + "section_order": null, + "flag_column": null, + "flag_column_test": null, + "min_items_per_candidate": null +} diff --git a/tests/data/experiments/knn-explain-same-trunc/rsmexplain.json b/tests/data/experiments/knn-explain-same-trunc/rsmexplain.json new file mode 100644 index 000000000..7a49cec3c --- /dev/null +++ b/tests/data/experiments/knn-explain-same-trunc/rsmexplain.json @@ -0,0 +1,13 @@ +{ + "description": "Explaning an KNeighborsRegressor model trained on all features.", + "experiment_dir": "existing_experiment", + "experiment_id": "knn_same_trunc", + "background_data": "../../files/train.csv", + "background_kmeans_size": 50, + "explain_data": "../../files/test.csv", + "truncate_outliers": false, + "id_column": "ID", + "sample_size": 10, + "num_features_to_display": 15, + "show_auto_cohorts": true +} diff --git a/tests/test_experiment_rsmexplain.py b/tests/test_experiment_rsmexplain.py index 2b70812dd..bb4d830e8 100644 --- a/tests/test_experiment_rsmexplain.py +++ b/tests/test_experiment_rsmexplain.py @@ -156,3 +156,73 @@ def test_run_rsmexplain_same_standardize_features_value(self): do_run_explain(source, config_dict) called_config = mock_generate_report.call_args[0][3] self.assertEqual(called_config["standardize_features"], False) + + def test_run_rsmexplain_different_truncate_outliers_value(self): + """Check that rsmtool truncate outliers value overrides rsmexplain value.""" + # set up a temporary directory since we will be using getcwd + temp_dir = tempfile.TemporaryDirectory(prefix=getcwd()) + + old_file_dict = { + "experiment_dir": "data/experiments/knn-explain-diff-trunc/existing_experiment", + "background_data": "data/files/train.csv", + "explain_data": "data/files/test.csv", + } + + new_file_dict = copy_data_files(temp_dir.name, old_file_dict, rsmtool_test_dir) + + source = "knn-explain-diff-trunc" + config_dict = { + "description": "Explaning an KNeighborsRegressor model trained on all features.", + "experiment_dir": new_file_dict["experiment_dir"], + "experiment_id": "knn_diff_trunc", + "background_data": new_file_dict["background_data"], + "background_kmeans_size": 50, + "explain_data": new_file_dict["explain_data"], + "truncate_outliers": False, + "id_column": "ID", + "sample_size": 10, + "num_features_to_display": 15, + "show_auto_cohorts": True, + } + + # check `truncate_outliers` in the config has been overridden to `True` + # since that was the value in rsmtool configuration + with patch("rsmtool.rsmexplain.generate_report") as mock_generate_report: + do_run_explain(source, config_dict) + called_config = mock_generate_report.call_args[0][3] + self.assertEqual(called_config["truncate_outliers"], True) + + def test_run_rsmexplain_same_truncate_outliers_value(self): + """Check that rsmexplain truncate outliers value does not change if matching rsmtool.""" + # set up a temporary directory since we will be using getcwd + temp_dir = tempfile.TemporaryDirectory(prefix=getcwd()) + + old_file_dict = { + "experiment_dir": "data/experiments/knn-explain-same-trunc/existing_experiment", + "background_data": "data/files/train.csv", + "explain_data": "data/files/test.csv", + } + + new_file_dict = copy_data_files(temp_dir.name, old_file_dict, rsmtool_test_dir) + + source = "knn-explain-same-trunc" + config_dict = { + "description": "Explaning an KNeighborsRegressor model trained on all features.", + "experiment_dir": new_file_dict["experiment_dir"], + "experiment_id": "knn_same_trunc", + "background_data": new_file_dict["background_data"], + "background_kmeans_size": 50, + "explain_data": new_file_dict["explain_data"], + "truncate_outliers": False, + "id_column": "ID", + "sample_size": 10, + "num_features_to_display": 15, + "show_auto_cohorts": True, + } + + # check `truncate_outliers` in the config is the same `False` as it was + # before since that matches the value in rsmtool configuration + with patch("rsmtool.rsmexplain.generate_report") as mock_generate_report: + do_run_explain(source, config_dict) + called_config = mock_generate_report.call_args[0][3] + self.assertEqual(called_config["truncate_outliers"], False) From 387aa2c5b90148d244fa9bdc19686eb4c7d1206f Mon Sep 17 00:00:00 2001 From: Nitin Madnani Date: Thu, 5 Oct 2023 18:26:37 -0400 Subject: [PATCH 12/13] test: update rsmeval generated configurations --- tests/data/output/autogenerated_rsmeval_config.json | 3 +-- tests/data/output/autogenerated_rsmeval_config_groups.json | 3 +-- tests/data/output/interactive_rsmeval_config.json | 3 +-- tests/data/output/interactive_rsmeval_config_groups.json | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/data/output/autogenerated_rsmeval_config.json b/tests/data/output/autogenerated_rsmeval_config.json index 4fd0c786f..a055df8c7 100644 --- a/tests/data/output/autogenerated_rsmeval_config.json +++ b/tests/data/output/autogenerated_rsmeval_config.json @@ -32,9 +32,8 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, - "truncate_outliers": true, "use_thumbnails": false, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} \ No newline at end of file +} diff --git a/tests/data/output/autogenerated_rsmeval_config_groups.json b/tests/data/output/autogenerated_rsmeval_config_groups.json index 29d8a7a73..da302d175 100644 --- a/tests/data/output/autogenerated_rsmeval_config_groups.json +++ b/tests/data/output/autogenerated_rsmeval_config_groups.json @@ -35,9 +35,8 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, - "truncate_outliers": true, "use_thumbnails": false, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} \ No newline at end of file +} diff --git a/tests/data/output/interactive_rsmeval_config.json b/tests/data/output/interactive_rsmeval_config.json index a5c5b71d1..b2f946bae 100644 --- a/tests/data/output/interactive_rsmeval_config.json +++ b/tests/data/output/interactive_rsmeval_config.json @@ -31,9 +31,8 @@ "special_sections": null, "subgroups": [], "trim_tolerance": 0.4998, - "truncate_outliers": true, "use_thumbnails": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} \ No newline at end of file +} diff --git a/tests/data/output/interactive_rsmeval_config_groups.json b/tests/data/output/interactive_rsmeval_config_groups.json index 6eb7b1c85..02266fef8 100644 --- a/tests/data/output/interactive_rsmeval_config_groups.json +++ b/tests/data/output/interactive_rsmeval_config_groups.json @@ -36,9 +36,8 @@ "L1" ], "trim_tolerance": 0.4998, - "truncate_outliers": true, "use_thumbnails": true, "use_wandb": false, "wandb_entity": null, "wandb_project": null -} \ No newline at end of file +} From 66b98afd528c886e08aa3a4072cf98ac3ee873ce Mon Sep 17 00:00:00 2001 From: Zhaoyang Xie Date: Thu, 5 Oct 2023 20:29:33 -0400 Subject: [PATCH 13/13] fix: restore config_rsmtool.json --- examples/rsmtool/config_rsmtool.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/rsmtool/config_rsmtool.json b/examples/rsmtool/config_rsmtool.json index 4f12cffc4..4bfa44d52 100644 --- a/examples/rsmtool/config_rsmtool.json +++ b/examples/rsmtool/config_rsmtool.json @@ -11,6 +11,5 @@ "trim_max": 6, "id_column": "ID", "second_human_score_column": "score2", - "length_column": "LENGTH", - "standardize_features": false + "length_column": "LENGTH" }