allenai · ArjunSubramonian · Jun 2, 2021 · Apr 13, 2021 · Apr 14, 2021 · Apr 14, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Added `TaskSuite` base class and command line functionality for running [`checklist`](https://github.com/marcotcr/checklist) test suites, along with implementations for `SentimentAnalysisSuite`, `QuestionAnsweringSuite`, and `TextualEntailmentSuite`. These can be found in the `allennlp.confidence_checks.task_checklists` module.
+- Added `BiasMitigatorApplicator`, which wraps any Model and mitigates biases by finetuning
+on a downstream task.
+- Added `EvaluateBiasMitigation`, which evaluates the effectiveness of bias mitigation by computing
+SNLI-related metrics for a bias-mitigated and baseline model.
 - Added `allennlp diff` command to compute a diff on model checkpoints, analogous to what `git diff` does on two files.
 - Meta data defined by the class `allennlp.common.meta.Meta` is now saved in the serialization directory and archive file
   when training models from the command line. This is also now part of the `Archive` named tuple that's returned from `load_archive()`.
@@ -39,7 +43,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - When `PretrainedTransformerIndexer` folds long sequences, it no longer loses the information from token type ids.
 - Fixed documentation for `GradientDescentTrainer.cuda_device`.
 
-
 ## [v2.4.0](https://github.com/allenai/allennlp/releases/tag/v2.4.0) - 2021-04-22
 
 ### Added
@@ -65,8 +68,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add new dimension to the `interpret` module: influence functions via the `InfluenceInterpreter` base class, along with a concrete implementation: `SimpleInfluence`.
 - Added a `quiet` parameter to the `MultiProcessDataLoading` that disables `Tqdm` progress bars.
 - The test for distributed metrics now takes a parameter specifying how often you want to run it.
-- Created the fairness module and added four fairness metrics: `Independence`, `Separation`, and `Sufficiency`.
-- Added three bias metrics to the fairness module: `WordEmbeddingAssociationTest`, `EmbeddingCoherenceTest`, `NaturalLanguageInference`, and `AssociationWithoutGroundTruth`.
+- Created the fairness module and added three fairness metrics: `Independence`, `Separation`, and `Sufficiency`.
+- Added four bias metrics to the fairness module: `WordEmbeddingAssociationTest`, `EmbeddingCoherenceTest`, `NaturalLanguageInference`, and `AssociationWithoutGroundTruth`.
 - Added four bias direction methods (`PCABiasDirection`, `PairedPCABiasDirection`, `TwoMeansBiasDirection`, `ClassificationNormalBiasDirection`) and four bias mitigation methods (`LinearBiasMitigator`, `HardBiasMitigator`, `INLPBiasMitigator`, `OSCaRBiasMitigator`).
 
 ### Changed

diff --git a/allennlp/commands/__init__.py b/allennlp/commands/__init__.py
@@ -20,6 +20,7 @@
 from allennlp.common.plugins import import_plugins
 from allennlp.common.util import import_module_and_submodules
 from allennlp.commands.checklist import CheckList
+from allennlp.fairness.evaluate_bias_mitigation import EvaluateBiasMitigation
 
 logger = logging.getLogger(__name__)
 

diff --git a/allennlp/data/dataset_readers/__init__.py b/allennlp/data/dataset_readers/__init__.py
@@ -19,3 +19,4 @@
 from allennlp.data.dataset_readers.sequence_tagging import SequenceTaggingDatasetReader
 from allennlp.data.dataset_readers.sharded_dataset_reader import ShardedDatasetReader
 from allennlp.data.dataset_readers.text_classification_json import TextClassificationJsonReader
+from allennlp.data.dataset_readers.snli import SnliReader
diff --git a/allennlp/data/dataset_readers/snli.py b/allennlp/data/dataset_readers/snli.py
@@ -0,0 +1,122 @@
+from typing import Dict, Optional
+import json
+import logging
+
+from overrides import overrides
+
+from allennlp.common.file_utils import cached_path
+from allennlp.data.dataset_readers.dataset_reader import DatasetReader
+from allennlp.data.fields import Field, TextField, LabelField, MetadataField
+from allennlp.data.instance import Instance
+from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
+from allennlp.data.tokenizers import Tokenizer, SpacyTokenizer, PretrainedTransformerTokenizer
+
+logger = logging.getLogger(__name__)
+
+
+def maybe_collapse_label(label: str, collapse: bool):
+    """
+    Helper function that optionally collapses the "contradiction" and "neutral" labels
+    into "non-entailment".
+    """
+    assert label in ["contradiction", "neutral", "entailment"]
+    if collapse and label in ["contradiction", "neutral"]:
+        return "non-entailment"
+    return label
+
+
+@DatasetReader.register("snli_for_bias")
+class SnliReader(DatasetReader):
+    """
+    Reads a file from the Stanford Natural Language Inference (SNLI) dataset.  This data is
+    formatted as jsonl, one json-formatted instance per line.  The keys in the data are
+    "gold_label", "sentence1", and "sentence2".  We convert these keys into fields named "label",
+    "premise" and "hypothesis", along with a metadata field containing the tokenized strings of the
+    premise and hypothesis.
+    Registered as a `DatasetReader` with name "snli".
+    # Parameters
+    tokenizer : `Tokenizer`, optional (default=`SpacyTokenizer()`)
+        We use this `Tokenizer` for both the premise and the hypothesis.  See :class:`Tokenizer`.
+    token_indexers : `Dict[str, TokenIndexer]`, optional (default=`{"tokens": SingleIdTokenIndexer()}`)
+        We similarly use this for both the premise and the hypothesis.  See :class:`TokenIndexer`.
+    combine_input_fields : `bool`, optional
+            (default=`isinstance(tokenizer, PretrainedTransformerTokenizer)`)
+        If False, represent the premise and the hypothesis as separate fields in the instance.
+        If True, tokenize them together using `tokenizer.tokenize_sentence_pair()`
+        and provide a single `tokens` field in the instance.
+    collapse_labels : `bool`, optional (default=`False`)
+        If `True`, the "neutral" and "contradiction" labels will be collapsed into "non-entailment";
+        "entailment" will be left unchanged.
+    """
+
+    def __init__(
+        self,
+        tokenizer: Optional[Tokenizer] = None,
+        token_indexers: Dict[str, TokenIndexer] = None,
+        combine_input_fields: Optional[bool] = None,
+        collapse_labels: bool = False,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            manual_distributed_sharding=True, manual_multiprocess_sharding=True, **kwargs
+        )
+        self._tokenizer = tokenizer or SpacyTokenizer()
+        if isinstance(self._tokenizer, PretrainedTransformerTokenizer):
+            assert not self._tokenizer._add_special_tokens
+        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
+        if combine_input_fields is not None:
+            self._combine_input_fields = combine_input_fields
+        else:
+            self._combine_input_fields = isinstance(self._tokenizer, PretrainedTransformerTokenizer)
+        self.collapse_labels = collapse_labels
+
+    @overrides
+    def _read(self, file_path: str):
+        # if `file_path` is a URL, redirect to the cache
+        file_path = cached_path(file_path)
+        with open(file_path, "r") as snli_file:
+            example_iter = (json.loads(line) for line in snli_file)
+            filtered_example_iter = (
+                example for example in example_iter if example.get("gold_label") != "-"
+            )
+            for example in self.shard_iterable(filtered_example_iter):
+                label = example.get("gold_label")
+                premise = example["sentence1"]
+                hypothesis = example["sentence2"]
+                yield self.text_to_instance(premise, hypothesis, label)
+
+    @overrides
+    def text_to_instance(self, premise, hypothesis, label: str = None) -> Instance:  # type: ignore
+
+        fields: Dict[str, Field] = {}
+        premise = self._tokenizer.tokenize(premise)
+        hypothesis = self._tokenizer.tokenize(hypothesis)
+
+        if self._combine_input_fields:
+            tokens = self._tokenizer.add_special_tokens(premise, hypothesis)
+            fields["tokens"] = TextField(tokens)
+        else:
+            premise_tokens = self._tokenizer.add_special_tokens(premise)
+            hypothesis_tokens = self._tokenizer.add_special_tokens(hypothesis)
+            fields["premise"] = TextField(premise_tokens)
+            fields["hypothesis"] = TextField(hypothesis_tokens)
+
+            metadata = {
+                "premise_tokens": [x.text for x in premise_tokens],
+                "hypothesis_tokens": [x.text for x in hypothesis_tokens],
+            }
+            fields["metadata"] = MetadataField(metadata)
+
+        if label:
+            maybe_collapsed_label = maybe_collapse_label(label, self.collapse_labels)
+            fields["label"] = LabelField(maybe_collapsed_label)
+
+        return Instance(fields)
+
+    @overrides
+    def apply_token_indexers(self, instance: Instance):
+        if "tokens" in instance.fields:
+            instance.fields["tokens"]._token_indexers = self._token_indexers  # type: ignore
+        else:
+            instance.fields["premise"]._token_indexers = self._token_indexers  # type: ignore
+            instance.fields["hypothesis"]._token_indexers = self._token_indexers  # type: ignore
diff --git a/allennlp/fairness/__init__.py b/allennlp/fairness/__init__.py
@@ -3,7 +3,8 @@
 
 1. measure the fairness of models according to multiple definitions of fairness
 2. measure bias amplification
-3. debias embeddings during training time and post-processing
+3. mitigate bias in static and contextualized embeddings during training time and
+post-processing
 """
 
 from allennlp.fairness.fairness_metrics import Independence, Separation, Sufficiency
@@ -25,3 +26,17 @@
     INLPBiasMitigator,
     OSCaRBiasMitigator,
 )
+from allennlp.fairness.bias_utils import load_words, load_word_pairs
+from allennlp.fairness.bias_mitigator_applicator import BiasMitigatorApplicator
+from allennlp.fairness.bias_mitigator_wrappers import (
+    HardBiasMitigatorWrapper,
+    LinearBiasMitigatorWrapper,
+    INLPBiasMitigatorWrapper,
+    OSCaRBiasMitigatorWrapper,
+)
+from allennlp.fairness.bias_direction_wrappers import (
+    PCABiasDirectionWrapper,
+    PairedPCABiasDirectionWrapper,
+    TwoMeansBiasDirectionWrapper,
+    ClassificationNormalBiasDirectionWrapper,
+)