update conll (#298)

allenai · Aug 31, 2021 · a8a3486 · a8a3486
1 parent 54de9d6
commit a8a3486
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added some additional `__init__()` parameters to the `T5` model in `allennlp_models.generation` for customizing
+- Added some additional `__init__()` parameters to the `T5` model in `allennlp_models.generation` for customizing.
   beam search and other options.
 - Added a configuration file for fine-tuning `t5-11b` on CCN-DM (requires at least 8 GPUs).
 - Added a configuration to train on the PIQA dataset with AllenNLP Tango.
@@ -18,8 +18,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
-- Fixed tests for Spacy versions greater than 3.1
-- Fixed the last step decoding when training CopyNet
+- Fixed tests for Spacy versions greater than 3.1.
+- Fixed the last step decoding when training CopyNet.
+- Allow singleton clusters in `ConllCorefScores`.
 
 ### Changed
 

diff --git a/allennlp_models/coref/metrics/conll_coref_scores.py b/allennlp_models/coref/metrics/conll_coref_scores.py
@@ -16,8 +16,9 @@ class ConllCorefScores(Metric):
 
     supports_distributed = True
 
-    def __init__(self) -> None:
+    def __init__(self, allow_singletons=False) -> None:
         self.scorers = [Scorer(m) for m in (Scorer.muc, Scorer.b_cubed, Scorer.ceafe)]
+        self.allow_singletons = allow_singletons
 
     @overrides
     def __call__(
@@ -56,7 +57,7 @@ def __call__(
         for i, metadata in enumerate(metadata_list):
             gold_clusters, mention_to_gold = self.get_gold_clusters(metadata["clusters"])
             predicted_clusters, mention_to_predicted = self.get_predicted_clusters(
-                top_spans[i], antecedent_indices[i], predicted_antecedents[i]
+                top_spans[i], antecedent_indices[i], predicted_antecedents[i], self.allow_singletons
             )
             for scorer in self.scorers:
                 scorer.update(
@@ -91,6 +92,7 @@ def get_predicted_clusters(
         top_spans: torch.Tensor,  # (num_spans, 2)
         antecedent_indices: torch.Tensor,  # (num_spans, num_antecedents)
         predicted_antecedents: torch.Tensor,  # (num_spans,)
+        allow_singletons: bool,
     ) -> Tuple[
         List[Tuple[Tuple[int, int], ...]], Dict[Tuple[int, int], Tuple[Tuple[int, int], ...]]
     ]:
@@ -104,7 +106,10 @@ def get_predicted_clusters(
             # Find predicted index in the antecedent spans.
             predicted_index = antecedent_indices[i, predicted_antecedent]
             # Must be a previous span.
-            assert i > predicted_index
+            if allow_singletons:
+                assert i >= predicted_index
+            else:
+                assert i > predicted_index
             antecedent_span: Tuple[int, int] = tuple(  # type: ignore
                 top_spans[predicted_index].tolist()
             )

diff --git a/tests/coref/metrics/conll_coref_scores_test.py b/tests/coref/metrics/conll_coref_scores_test.py
@@ -17,7 +17,7 @@ def test_get_predicted_clusters(self, device: str):
         antecedent_indices = torch.tensor([[-1, -1, -1], [0, -1, -1], [0, 1, -1]], device=device)
         predicted_antecedents = torch.tensor([-1, -1, 1], device=device)
         clusters, mention_to_cluster = ConllCorefScores.get_predicted_clusters(
-            top_spans, antecedent_indices, predicted_antecedents
+            top_spans, antecedent_indices, predicted_antecedents, allow_singletons=False
         )
         assert len(clusters) == 1
         assert set(clusters[0]) == {(4, 6), (8, 9)}