web-platform-tests · DanielRyanSmith · Jan 11, 2022 · Dec 27, 2021 · Dec 27, 2021 · Dec 27, 2021
diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
@@ -4,7 +4,7 @@
 import io
 import os
 from collections import OrderedDict, defaultdict
-from datetime import datetime
+from datetime import datetime, timedelta
 
 from mozlog import reader
 from mozlog.formatters import JSONFormatter
@@ -261,7 +261,8 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):
         log("</details>\n")
 
 
-def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
+def run_step(logger, iterations, restart_after_iteration,
+             kwargs_extras, **kwargs):
     from . import wptrunner
     kwargs = copy.deepcopy(kwargs)
 
@@ -270,6 +271,13 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
     else:
         kwargs["rerun"] = iterations
 
+    # Keep track if the runs were stopped early to avoid
+    # hitting the timeout. If so, the actual number of iterations run
+    # should be used to process the results. The number here will
+    # be set to the actual value for later reference if it changes.
+    kwargs["avoided_timeout"] = {"did_avoid": False,
+                                 "iterations_run": iterations}
+
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
 
@@ -290,6 +298,12 @@ def wrap_handler(x):
 
     wptrunner.run_tests(**kwargs)
 
+    # Use the number of repeated test suites that were run
+    # to process the results if the runs were stopped to
+    # avoid hitting the maximum run time.
+    if kwargs["avoided_timeout"]["did_avoid"]:
+        iterations = kwargs["avoided_timeout"]["iterations_run"]
+
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
     logger._state.suite_started = False
@@ -311,12 +325,24 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_loop,
+                                            False,
+                                            kwargs_extra),
+                          repeat_loop))
 
         if repeat_restart:
             desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
                                                                          flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_restart,
+                                            True,
+                                            kwargs_extra),
+                          repeat_restart))
 
     return steps
 
@@ -335,8 +361,9 @@ def write_summary(logger, step_results, final_result):
 
     logger.info(':::')
 
-def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
-                    output_results=True, **kwargs):
+
+def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
+                    max_time=None, output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
         kwargs_extras.append({"chaos_mode_flags": "0xfb"})
@@ -346,38 +373,61 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
     start_time = datetime.now()
     step_results = []
 
-    github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
+    github_checks_outputter = get_gh_checks_outputter(
+        kwargs["github_checks_text_file"])
 
-    for desc, step_func in steps:
-        if max_time and datetime.now() - start_time > max_time:
+    for desc, step_func, expected_iterations in steps:
+        if max_time and \
+                datetime.now() - start_time > timedelta(minutes=max_time):
             logger.info("::: Test verification is taking too long: Giving up!")
-            logger.info("::: So far, all checks passed, but not all checks were run.")
+            logger.info(
+                "::: So far, all checks passed, but not all checks were run.")
             write_summary(logger, step_results, "TIMEOUT")
             return 2
 
         logger.info(':::')
         logger.info('::: Running test verification step "%s"...' % desc)
         logger.info(':::')
         results, inconsistent, slow, iterations = step_func(**kwargs)
+
+        if iterations <= 1:
+            step_results.append((desc, "FAIL"))
+            logger.info("::: Reached iteration timeout before finishing "
+                        "2 or more repeat runs.")
+            logger.info("::: At least 2 successful repeat runs are required "
+                        "to validate stability.")
+            write_summary(logger, step_results, "TIMEOUT")
+            return 1
+
         if output_results:
             write_results(logger.info, results, iterations)
 
         if inconsistent:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
+                write_github_checks_summary_inconsistent(
+                    github_checks_outputter.output, inconsistent, iterations)
             write_inconsistent(logger.info, inconsistent, iterations)
             write_summary(logger, step_results, "FAIL")
             return 1
 
         if slow:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_slow_tests(github_checks_outputter.output, slow)
+                write_github_checks_summary_slow_tests(
+                    github_checks_outputter.output, slow)
             write_slow_tests(logger.info, slow)
             write_summary(logger, step_results, "FAIL")
             return 1
 
-        step_results.append((desc, "PASS"))
+        # If the tests passed but the number of iterations didn't match
+        # the number expected to run, it is likely that the runs were
+        # stopped early to avoid a timeout.
+        if iterations != expected_iterations:
+            result = "PASS *  %i/%i repeats completed" % (
+                iterations, expected_iterations)
+            step_results.append((desc, result))
+        else:
+            step_results.append((desc, "PASS"))
 
     write_summary(logger, step_results, "PASS")
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -3,7 +3,6 @@
 import sys
 from collections import OrderedDict
 from distutils.spawn import find_executable
-from datetime import timedelta
 
 from . import config
 from . import wpttest
@@ -115,7 +114,11 @@ def create_parser(product_choices=None):
     mode_group.add_argument("--verify-max-time", action="store",
                             default=None,
                             help="The maximum number of minutes for the job to run",
-                            type=lambda x: timedelta(minutes=float(x)))
+                            type=int)
+    mode_group.add_argument("--repeat-max-time", action="store",
+                            default=100,
+                            help="The maximum number of minutes for the test suite to attempt repeat runs",
+                            type=int)
     output_results_group = mode_group.add_mutually_exclusive_group()
     output_results_group.add_argument("--verify-no-output-results", action="store_false",
                                       dest="verify_output_results",