7660 Avoid CI stability checks timing out (#32202)

* allow stability checks to avoid TC timeout by checking times in between repeat runs * fix flake8 issue * remove empty flag to trigger stability checks * some commenting explaining how iterations are being tracked * add --repeat-max-time flag * better descriptors for announcing results If the repeat runs stop early to avoid, timeout, a message will be written in the results explaining so. * cast max_time to timedelta * correct syntax for kwargs * specify kwargs source for run_test_iteration * remove empty css flags tag to trigger stability checks * Add clause to handle an ineffective number of iterations completing * replace unassociated change used to trigger stability checks * Implement changes suggested by @stephenmcgruer * Add only necessary formatting changes to stability * wptrunner reformatting changes suggested by @jgraham * functional changes to stability tests suggested by @jgraham * flake8 changes "line break before binary operator" * change run_tests to return counts object * ensure run_tests return values are properly accessed Now that wptrunner's run_tests returns more than 1 value, the return type will be a tuple for the older variables that expect only 1 value. We need to ensure that we pull the expected first value (boolean) out of that tuple. * run_tests has consistent return values even in fail cases wptrunner's run_tests would return a tuple only if not issues arose while running, and would return only a boolean in the case of some expected issue. Now a tuple is returned in all instances as expected. * Return full counts in TestStatus class run_tests will now return a new TestStatus object rather than returning only the number of iterations run. This will allow for more robust statistics to be shown in the future. * small formatting changes reducing some comments and logs to taking less vertical space. * small wording change TestStatus docstring * Replace counts with TestStatus object forego the use of defaultdict counts keeping track of test info/results and instead use the custom class TestStatus. * convert some log strings to f-strings
web-platform-tests · Jan 11, 2022 · 0a269ce · 0a269ce
1 parent 4928cc8
commit 0a269ce
Show file tree

Hide file tree

Showing 3 changed files with 216 additions and 133 deletions.
diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
@@ -288,7 +288,10 @@ def wrap_handler(x):
     # warning+ level logs only
     logger.add_handler(StreamHandler(log, JSONFormatter()))
 
-    wptrunner.run_tests(**kwargs)
+    # Use the number of iterations of the test suite that were run to process the results.
+    # if the runs were stopped to avoid hitting the maximum run time.
+    _, test_status = wptrunner.run_tests(**kwargs)
+    iterations = test_status.repeated_runs
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
@@ -311,12 +314,24 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_loop,
+                                            False,
+                                            kwargs_extra),
+                          repeat_loop))
 
         if repeat_restart:
             desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
                                                                          flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_restart,
+                                            True,
+                                            kwargs_extra),
+                          repeat_restart))
 
     return steps
 
@@ -335,6 +350,7 @@ def write_summary(logger, step_results, final_result):
 
     logger.info(':::')
 
+
 def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
                     output_results=True, **kwargs):
     kwargs_extras = [{}]
@@ -348,7 +364,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
 
     github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
 
-    for desc, step_func in steps:
+    for desc, step_func, expected_iterations in steps:
         if max_time and datetime.now() - start_time > max_time:
             logger.info("::: Test verification is taking too long: Giving up!")
             logger.info("::: So far, all checks passed, but not all checks were run.")
@@ -359,6 +375,14 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
         logger.info('::: Running test verification step "%s"...' % desc)
         logger.info(':::')
         results, inconsistent, slow, iterations = step_func(**kwargs)
+
+        if iterations <= 1 and expected_iterations > 1:
+            step_results.append((desc, "FAIL"))
+            logger.info("::: Reached iteration timeout before finishing 2 or more repeat runs.")
+            logger.info("::: At least 2 successful repeat runs are required to validate stability.")
+            write_summary(logger, step_results, "TIMEOUT")
+            return 1
+
         if output_results:
             write_results(logger.info, results, iterations)
 
@@ -378,6 +402,12 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
             write_summary(logger, step_results, "FAIL")
             return 1
 
-        step_results.append((desc, "PASS"))
+        # If the tests passed but the number of iterations didn't match the number expected to run,
+        # it is likely that the runs were stopped early to avoid a timeout.
+        if iterations != expected_iterations:
+            result = f"PASS *  {iterations}/{expected_iterations} repeats completed"
+            step_results.append((desc, result))
+        else:
+            step_results.append((desc, "PASS"))
 
     write_summary(logger, step_results, "PASS")
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -116,6 +116,10 @@ def create_parser(product_choices=None):
                             default=None,
                             help="The maximum number of minutes for the job to run",
                             type=lambda x: timedelta(minutes=float(x)))
+    mode_group.add_argument("--repeat-max-time", action="store",
+                            default=100,
+                            help="The maximum number of minutes for the test suite to attempt repeat runs",
+                            type=int)
     output_results_group = mode_group.add_mutually_exclusive_group()
     output_results_group.add_argument("--verify-no-output-results", action="store_false",
                                       dest="verify_output_results",