From 73450a82cb76ac8f284ea534a268efc591055a46 Mon Sep 17 00:00:00 2001
From: Matthias Koeppe <mkoeppe@math.ucdavis.edu>
Date: Wed, 26 Jan 2022 10:39:25 -0800
Subject: [PATCH 1/3] src/bin/sage-runtests: New option --baseline-stats-path

---
 src/bin/sage-runtests | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/bin/sage-runtests b/src/bin/sage-runtests
index 15ab1ce671a..67876ccf974 100755
--- a/src/bin/sage-runtests
+++ b/src/bin/sage-runtests
@@ -99,7 +99,9 @@ if __name__ == "__main__":
         help="print a summary at the end of each file of optional tests that were skipped")
 
     parser.add_argument("--stats_path", "--stats-path", default=os.path.join(DOT_SAGE, "timings2.json"),
-                        help="path to a json dictionary for the latest run storing a timing for each file")
+                        help="path to a json dictionary for timings and failure status for each file from previous runs; it will be updated in this run")
+    parser.add_argument("--baseline_stats_path", "--baseline-stats-path", default=None,
+                        help="path to a json dictionary for timings and failure status for each file, to be used as a baseline; it will not be updated")
 
     class GCAction(argparse.Action):
         def __call__(self, parser, namespace, values, option_string=None):

From 3551e196e0a9f65921320f41825f0bc7e6a6d785 Mon Sep 17 00:00:00 2001
From: Matthias Koeppe <mkoeppe@math.ucdavis.edu>
Date: Wed, 26 Jan 2022 11:03:54 -0800
Subject: [PATCH 2/3] src/sage/doctest/control.py: Load base line stats

---
 src/sage/doctest/control.py | 43 +++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/src/sage/doctest/control.py b/src/sage/doctest/control.py
index 01f32fb8e45..d73e7f2b16e 100644
--- a/src/sage/doctest/control.py
+++ b/src/sage/doctest/control.py
@@ -124,6 +124,7 @@ def __init__(self, **kwds):
         self.new = False
         self.show_skipped = False
         self.target_walltime = -1
+        self.baseline_stats_path = None
 
         # sage-runtests contains more optional tags. Technically, adding
         # auto_optional_tags here is redundant, since that is added
@@ -456,6 +457,9 @@ def __init__(self, options, args):
 
         self.stats = {}
         self.load_stats(options.stats_path)
+        self.baseline_stats = {}
+        if options.baseline_stats_path:
+            self.load_baseline_stats(options.baseline_stats_path)
         self._init_warn_long()
 
         if self.options.random_seed is None:
@@ -568,6 +572,45 @@ def load_environment(self):
         from importlib import import_module
         return import_module(self.options.environment)
 
+    def load_baseline_stats(self, filename):
+        """
+        Load baseline stats.
+
+        This must be a JSON file in the same format that :meth:`load_stats`
+        expects.
+
+        EXAMPLES::
+
+            sage: from sage.doctest.control import DocTestDefaults, DocTestController
+            sage: DC = DocTestController(DocTestDefaults(), [])
+            sage: import json
+            sage: filename = tmp_filename()
+            sage: with open(filename, 'w') as stats_file:
+            ....:     json.dump({'sage.doctest.control':{'failed':True}}, stats_file)
+            sage: DC.load_baseline_stats(filename)
+            sage: DC.baseline_stats['sage.doctest.control']
+            {'failed': True}
+
+        If the file doesn't exist, nothing happens. If there is an
+        error, print a message. In any case, leave the stats alone::
+
+            sage: d = tmp_dir()
+            sage: DC.load_baseline_stats(os.path.join(d))  # Cannot read a directory
+            Error loading baseline stats from ...
+            sage: DC.load_baseline_stats(os.path.join(d, "no_such_file"))
+            sage: DC.baseline_stats['sage.doctest.control']
+            {'failed': True}
+        """
+        # Simply ignore non-existing files
+        if not os.path.exists(filename):
+            return
+
+        try:
+            with open(filename) as stats_file:
+                self.baseline_stats.update(json.load(stats_file))
+        except Exception:
+            self.log("Error loading baseline stats from %s"%filename)
+
     def load_stats(self, filename):
         """
         Load stats from the most recent run(s).

From 09ce9e04c5e511c5dfe852c6d083361e5bd55f98 Mon Sep 17 00:00:00 2001
From: Matthias Koeppe <mkoeppe@math.ucdavis.edu>
Date: Wed, 26 Jan 2022 11:42:35 -0800
Subject: [PATCH 3/3] src/sage/doctest/reporting.py: No error status for
 failures already seen in baseline

---
 src/sage/doctest/reporting.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/sage/doctest/reporting.py b/src/sage/doctest/reporting.py
index 67439257047..d291842742d 100644
--- a/src/sage/doctest/reporting.py
+++ b/src/sage/doctest/reporting.py
@@ -378,6 +378,10 @@ def report(self, source, timeout, return_code, results, output, pid=None):
             postscript = self.postscript
             stats = self.stats
             basename = source.basename
+            if self.controller.baseline_stats:
+                the_baseline_stats = self.controller.baseline_stats.get(basename, {})
+            else:
+                the_baseline_stats = {}
             cmd = self.report_head(source)
             try:
                 ntests, result_dict = results
@@ -398,12 +402,15 @@ def report(self, source, timeout, return_code, results, output, pid=None):
                         fail_msg += " (and interrupt failed)"
                     else:
                         fail_msg += " (with %s after interrupt)"%signal_name(sig)
+                if the_baseline_stats.get('failed', False):
+                    failmsg += " [failed in baseline]"
                 log("    %s\n%s\nTests run before %s timed out:"%(fail_msg, "*"*70, process_name))
                 log(output)
                 log("*"*70)
                 postscript['lines'].append(cmd + "  # %s"%fail_msg)
                 stats[basename] = dict(failed=True, walltime=1e6)
-                self.error_status |= 4
+                if not the_baseline_stats.get('failed', False):
+                    self.error_status |= 4
             elif return_code:
                 if return_code > 0:
                     fail_msg = "Bad exit: %s"%return_code
@@ -411,12 +418,15 @@ def report(self, source, timeout, return_code, results, output, pid=None):
                     fail_msg = "Killed due to %s"%signal_name(-return_code)
                 if ntests > 0:
                     fail_msg += " after testing finished"
+                if the_baseline_stats.get('failed', False):
+                    failmsg += " [failed in baseline]"
                 log("    %s\n%s\nTests run before %s failed:"%(fail_msg,"*"*70, process_name))
                 log(output)
                 log("*"*70)
                 postscript['lines'].append(cmd + "  # %s" % fail_msg)
                 stats[basename] = dict(failed=True, walltime=1e6)
-                self.error_status |= (8 if return_code > 0 else 16)
+                if not the_baseline_stats.get('failed', False):
+                    self.error_status |= (8 if return_code > 0 else 16)
             else:
                 if hasattr(result_dict, 'walltime') and hasattr(result_dict.walltime, '__len__') and len(result_dict.walltime) > 0:
                     wall = sum(result_dict.walltime) / len(result_dict.walltime)
@@ -477,8 +487,12 @@ def report(self, source, timeout, return_code, results, output, pid=None):
                 if result_dict.err is None or result_dict.err == 'tab':
                     f = result_dict.failures
                     if f:
-                        postscript['lines'].append(cmd + "  # %s failed" % (count_noun(f, "doctest")))
-                        self.error_status |= 1
+                        failmsg = "%s failed" % (count_noun(f, "doctest"))
+                        if the_baseline_stats.get('failed', False):
+                            failmsg += " [failed in baseline]"
+                        postscript['lines'].append(cmd + "  # %s" % failmsg)
+                        if not the_baseline_stats.get('failed', False):
+                            self.error_status |= 1
                     if f or result_dict.err == 'tab':
                         stats[basename] = dict(failed=True, walltime=wall)
                     else: