allenai · epwalsh · Nov 1, 2021 · Jun 5, 2021 · Jun 8, 2021 · Jun 8, 2021
diff --git a/allennlp_models/generation/dataset_readers/cnn_dm.py b/allennlp_models/generation/dataset_readers/cnn_dm.py
@@ -85,10 +85,6 @@ def _sanitize_story_line(line):
 
         sentence_endings = [".", "!", "?", "...", "'", "`", '"', ")", "\u2019", "\u201d"]
 
-        # CNN stories always start with "(CNN)"
-        if line.startswith("(CNN)"):
-            line = line[len("(CNN)") :]
-
         # Highlight are essentially bullet points and don't have proper sentence endings
         if line[-1] not in sentence_endings:
             line += "."
@@ -104,6 +100,11 @@ def _read_story(story_path: str):
         with open(story_path, "r") as f:
             for line in f:
                 line = line.strip()
+
+                # CNN stories always start with "(CNN)"
+                if line.startswith("(CNN)"):
+                    line = line[len("(CNN)") :]
+
                 if line == "":
                     continue