From 3058ead8e169a71e6700cbec0c6837e8650fdb37 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 27 Dec 2021 05:24:57 -0800
Subject: [PATCH 01/25] allow stability checks to avoid TC timeout by checking
 times in between repeat runs

---
 tools/wptrunner/wptrunner/stability.py      |  35 +-
 tools/wptrunner/wptrunner/wptcommandline.py |   5 +-
 tools/wptrunner/wptrunner/wptrunner.py      | 373 ++++++++++++--------
 3 files changed, 250 insertions(+), 163 deletions(-)
diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index eeb5af23002993..96138bc347ab19 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -4,7 +4,7 @@
 import io
 import os
 from collections import OrderedDict, defaultdict
-from datetime import datetime
+from datetime import datetime, timedelta
 
 from mozlog import reader
 from mozlog.formatters import JSONFormatter
@@ -261,7 +261,8 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):
         log("</details>\n")
 
 
-def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
+def run_step(logger, iterations, restart_after_iteration,
+             kwargs_extras, **kwargs):
     from . import wptrunner
     kwargs = copy.deepcopy(kwargs)
 
@@ -269,6 +270,10 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
         kwargs["repeat"] = iterations
     else:
         kwargs["rerun"] = iterations
+    kwargs["avoided_timeout"] = {"did_avoid": False,
+                                 "iterations_run": iterations}
+    if "max_time" in kwargs:
+        kwargs["max_time"] = timedelta(minutes=kwargs["verify_max_time"])
 
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
@@ -290,6 +295,12 @@ def wrap_handler(x):
 
     wptrunner.run_tests(**kwargs)
 
+    # use the number of repeated test suites that were run
+    # to process the results if the runs were stopped to
+    # avoid hitting a TC timeout.
+    if kwargs["avoided_timeout"]["did_avoid"]:
+        iterations = kwargs["avoided_timeout"]["iterations_run"]
+
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
     logger._state.suite_started = False
@@ -335,8 +346,9 @@ def write_summary(logger, step_results, final_result):
 
     logger.info(':::')
 
-def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
-                    output_results=True, **kwargs):
+
+def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
+                    max_time=None, output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
         kwargs_extras.append({"chaos_mode_flags": "0xfb"})
@@ -346,12 +358,15 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
     start_time = datetime.now()
     step_results = []
 
-    github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
+    github_checks_outputter = get_gh_checks_outputter(
+        kwargs["github_checks_text_file"])
 
     for desc, step_func in steps:
-        if max_time and datetime.now() - start_time > max_time:
+        if max_time and \
+                datetime.now() - start_time > timedelta(minutes=max_time):
             logger.info("::: Test verification is taking too long: Giving up!")
-            logger.info("::: So far, all checks passed, but not all checks were run.")
+            logger.info(
+                "::: So far, all checks passed, but not all checks were run.")
             write_summary(logger, step_results, "TIMEOUT")
             return 2
 
@@ -365,7 +380,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
         if inconsistent:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
+                write_github_checks_summary_inconsistent(
+                    github_checks_outputter.output, inconsistent, iterations)
             write_inconsistent(logger.info, inconsistent, iterations)
             write_summary(logger, step_results, "FAIL")
             return 1
@@ -373,7 +389,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
         if slow:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_slow_tests(github_checks_outputter.output, slow)
+                write_github_checks_summary_slow_tests(
+                    github_checks_outputter.output, slow)
             write_slow_tests(logger.info, slow)
             write_summary(logger, step_results, "FAIL")
             return 1
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
index fd75f115649077..b7c353012f19a6 100644
--- a/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -3,7 +3,6 @@
 import sys
 from collections import OrderedDict
 from distutils.spawn import find_executable
-from datetime import timedelta
 
 from . import config
 from . import wpttest
@@ -113,9 +112,9 @@ def create_parser(product_choices=None):
                                   dest="verify_chaos_mode",
                                   help="Enable chaos mode when running on Firefox")
     mode_group.add_argument("--verify-max-time", action="store",
-                            default=None,
+                            default=100,
                             help="The maximum number of minutes for the job to run",
-                            type=lambda x: timedelta(minutes=float(x)))
+                            type=int)
     output_results_group = mode_group.add_mutually_exclusive_group()
     output_results_group.add_argument("--verify-no-output-results", action="store_false",
                                       dest="verify_output_results",
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 76ef3dbc9901ca..78d2567d44cec3 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -1,6 +1,8 @@
 import json
 import os
 import sys
+from datetime import datetime, timedelta
+from collections import defaultdict
 
 import wptserve
 from wptserve import sslutils
@@ -37,6 +39,7 @@
 metadata files are used to store the expected test results.
 """
 
+
 def setup_logging(*args, **kwargs):
     global logger
     logger = wptlogging.setup(*args, **kwargs)
@@ -150,7 +153,135 @@ def get_pause_after_test(test_loader, **kwargs):
     return kwargs["pause_after_test"]
 
 
-def run_tests(config, test_paths, product, **kwargs):
+def run_test_iteration(counts, test_loader, test_source_kwargs,
+                       test_source_cls, run_info, recording,
+                       test_environment, product, kwargs):
+    """Runs the entire test suite.
+    This is called for each repeat run requested."""
+    tests = []
+    for test_type in test_loader.test_types:
+        tests.extend(test_loader.tests[test_type])
+
+    try:
+        test_groups = test_source_cls.tests_by_group(
+            tests, **test_source_kwargs)
+    except Exception:
+        logger.critical("Loading tests failed")
+        return False
+
+    logger.suite_start(test_groups,
+                       name='web-platform-test',
+                       run_info=run_info,
+                       extra={"run_by_dir": kwargs["run_by_dir"]})
+    for test_type in kwargs["test_types"]:
+        logger.info("Running %s tests" % test_type)
+
+        browser_cls = product.get_browser_cls(test_type)
+
+        browser_kwargs = \
+            product.get_browser_kwargs(logger,
+                                       test_type,
+                                       run_info,
+                                       config=test_environment.config,
+                                       num_test_groups=len(
+                                           test_groups),
+                                       **kwargs)
+
+        executor_cls = product.executor_classes.get(test_type)
+        executor_kwargs = product.get_executor_kwargs(logger,
+                                                      test_type,
+                                                      test_environment,
+                                                      run_info,
+                                                      **kwargs)
+
+        if executor_cls is None:
+            logger.error("Unsupported test type %s for product %s" %
+                         (test_type, product.name))
+            continue
+
+        for test in test_loader.disabled_tests[test_type]:
+            logger.test_start(test.id)
+            logger.test_end(test.id, status="SKIP")
+            counts["skipped"] += 1
+
+        if test_type == "testharness":
+            run_tests = {"testharness": []}
+            for test in test_loader.tests["testharness"]:
+                if ((test.testdriver
+                    and not executor_cls.supports_testdriver) or
+                        (test.jsshell and not executor_cls.supports_jsshell)):
+                    logger.test_start(test.id)
+                    logger.test_end(test.id, status="SKIP")
+                    counts["skipped"] += 1
+                else:
+                    run_tests["testharness"].append(test)
+        else:
+            run_tests = test_loader.tests
+
+        recording.pause()
+        with ManagerGroup("web-platform-tests",
+                          kwargs["processes"],
+                          test_source_cls,
+                          test_source_kwargs,
+                          browser_cls,
+                          browser_kwargs,
+                          executor_cls,
+                          executor_kwargs,
+                          kwargs["rerun"],
+                          kwargs["pause_after_test"],
+                          kwargs["pause_on_unexpected"],
+                          kwargs["restart_on_unexpected"],
+                          kwargs["debug_info"],
+                          not kwargs["no_capture_stdio"],
+                          recording=recording) as manager_group:
+            try:
+                manager_group.run(test_type, run_tests)
+            except KeyboardInterrupt:
+                logger.critical("Main thread got signal")
+                manager_group.stop()
+                raise
+            counts["total_tests"] += manager_group.test_count()
+            counts["unexpected"] += manager_group.unexpected_count()
+            counts["unexpected_pass"] += manager_group.unexpected_pass_count()
+
+    return True
+
+
+def evaluate_runs(counts, avoided_timeout, kwargs):
+    """Evaluates the test counts after the
+    given number of repeat runs has finished"""
+    if counts["total_tests"] == 0:
+        if counts["skipped"] > 0:
+            logger.warning("All requested tests were skipped")
+        else:
+            if kwargs["default_exclude"]:
+                logger.info("No tests ran")
+                return True
+            else:
+                logger.critical("No tests ran")
+                return False
+
+    if counts["unexpected"] and not kwargs["fail_on_unexpected"]:
+        logger.info("Tolerating %s unexpected results" % counts["unexpected"])
+        return True
+
+    all_unexpected_passed = (counts["unexpected"] and
+                             counts["unexpected"] == counts["unexpected_pass"])
+    if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]:
+        logger.info("Tolerating %i unexpected results because they all PASS" %
+                    counts["unexpected_pass"])
+        return True
+
+    # If the runs were stopped early to avoid a TC timeout,
+    # the number of iterations that were run need to be returned
+    # so that the test results can be processed appropriately.
+    if avoided_timeout:
+        kwargs["avoided_timeout"]["did_avoid"] = True
+        kwargs["avoided_timeout"]["iterations_run"] = counts["repeat"]
+    return counts["unexpected"] == 0
+
+
+def run_tests(config, test_paths, product, max_time=None, **kwargs):
     """Set up the test environment, load the list of tests to be executed, and
     invoke the remainder of the code to execute tests"""
     mp = mpcontext.get_context()
@@ -158,9 +289,11 @@ def run_tests(config, test_paths, product, **kwargs):
         recorder = instruments.NullInstrument()
     else:
         recorder = instruments.Instrument(kwargs["instrument_to_file"])
-    with recorder as recording, capture.CaptureIO(logger,
-                                                  not kwargs["no_capture_stdio"],
-                                                  mp_context=mp):
+
+    with recorder as recording, \
+        capture.CaptureIO(logger,
+                          not kwargs["no_capture_stdio"],
+                          mp_context=mp):
         recording.set(["startup"])
         env.do_delayed_imports(logger, test_paths)
 
@@ -174,52 +307,59 @@ def run_tests(config, test_paths, product, **kwargs):
             env_extras.append(FontInstaller(
                 logger,
                 font_dir=kwargs["font_dir"],
-                ahem=os.path.join(test_paths["/"]["tests_path"], "fonts/Ahem.ttf")
+                ahem=os.path.join(
+                    test_paths["/"]["tests_path"], "fonts/Ahem.ttf")
             ))
 
         recording.set(["startup", "load_tests"])
 
-        test_groups = (testloader.TestGroupsFile(logger, kwargs["test_groups_file"])
-                       if kwargs["test_groups_file"] else None)
+        test_groups = \
+            (testloader.TestGroupsFile(logger, kwargs["test_groups_file"])
+             if kwargs["test_groups_file"] else None)
 
         (test_source_cls,
          test_source_kwargs,
          chunker_kwargs) = testloader.get_test_src(logger=logger,
                                                    test_groups=test_groups,
                                                    **kwargs)
-        run_info, test_loader = get_loader(test_paths,
-                                           product.name,
-                                           run_info_extras=product.run_info_extras(**kwargs),
-                                           chunker_kwargs=chunker_kwargs,
-                                           test_groups=test_groups,
-                                           **kwargs)
+        run_info, test_loader = \
+            get_loader(test_paths, product.name,
+                       run_info_extras=product.run_info_extras(
+                           **kwargs),
+                       chunker_kwargs=chunker_kwargs,
+                       test_groups=test_groups,
+                       **kwargs)
 
         logger.info("Using %i client processes" % kwargs["processes"])
 
-        skipped_tests = 0
-        test_total = 0
-        unexpected_total = 0
-        unexpected_pass_total = 0
-
+        counts = defaultdict(int)
         if len(test_loader.test_ids) == 0 and kwargs["test_list"]:
             logger.critical("Unable to find any tests at the path(s):")
             for path in kwargs["test_list"]:
                 logger.critical("  %s" % path)
-            logger.critical("Please check spelling and make sure there are tests in the specified path(s).")
+            logger.critical(
+                "Please check spelling and make sure"
+                " there are tests in the specified path(s).")
             return False
-        kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)
-
-        ssl_config = {"type": kwargs["ssl_type"],
-                      "openssl": {"openssl_binary": kwargs["openssl_binary"]},
-                      "pregenerated": {"host_key_path": kwargs["host_key_path"],
-                                       "host_cert_path": kwargs["host_cert_path"],
-                                       "ca_cert_path": kwargs["ca_cert_path"]}}
-
-        testharness_timeout_multipler = product.get_timeout_multiplier("testharness",
-                                                                       run_info,
-                                                                       **kwargs)
+        kwargs["pause_after_test"] = get_pause_after_test(
+            test_loader, **kwargs)
+
+        ssl_config = {
+            "type": kwargs["ssl_type"],
+            "openssl": {"openssl_binary": kwargs["openssl_binary"]},
+            "pregenerated": {"host_key_path": kwargs["host_key_path"],
+                             "host_cert_path": kwargs["host_cert_path"],
+                             "ca_cert_path": kwargs["ca_cert_path"]}
+        }
+
+        testharness_timeout_multipler = \
+            product.get_timeout_multiplier("testharness",
+                                           run_info,
+                                           **kwargs)
 
-        mojojs_path = kwargs["mojojs_path"] if kwargs["enable_mojojs"] else None
+        mojojs_path = None
+        if kwargs["enable_mojojs"]:
+            mojojs_path = kwargs["mojojs_path"]
 
         recording.set(["startup", "start_environment"])
         with env.TestEnvironment(test_paths,
@@ -235,6 +375,7 @@ def run_tests(config, test_paths, product, **kwargs):
             recording.set(["startup", "ensure_environment"])
             try:
                 test_environment.ensure_started()
+                start_time = datetime.now()
             except env.TestEnvironmentError as e:
                 logger.critical("Error starting test environment: %s" % e)
                 raise
@@ -242,136 +383,66 @@ def run_tests(config, test_paths, product, **kwargs):
             recording.set(["startup"])
 
             repeat = kwargs["repeat"]
-            repeat_count = 0
             repeat_until_unexpected = kwargs["repeat_until_unexpected"]
 
-            while repeat_count < repeat or repeat_until_unexpected:
-                repeat_count += 1
+            # keep track of longest time taken to complete a
+            # test suite iteration so that the runs can be stopped
+            # to avoid a possible TC timeout.
+            longest_iteration_time = timedelta()
+            # keep track if we break the loop to avoid timeout.
+            avoided_timeout = False
+
+            while counts["repeat"] < repeat or repeat_until_unexpected:
+                # if the next repeat run could cause the TC timeout to be
+                # reached, stop now and use the test results we have.
+                estimate = datetime.now() + longest_iteration_time
+                if not repeat_until_unexpected and max_time \
+                        and estimate >= start_time + max_time:
+                    avoided_timeout = True
+                    logger.info(
+                        "Repeat runs are in danger of reaching timeout!"
+                        " Quitting early.")
+                    logger.info(
+                        "Ran %s of %s iterations." %
+                        (counts["repeat"], repeat))
+                    break
+
+                # begin tracking runtime of the test suite
+                iteration_start = datetime.now()
+                counts["repeat"] += 1
                 if repeat_until_unexpected:
-                    logger.info("Repetition %i" % (repeat_count))
+                    logger.info("Repetition %i" % (counts["repeat"]))
                 elif repeat > 1:
-                    logger.info("Repetition %i / %i" % (repeat_count, repeat))
-
-                test_count = 0
-                unexpected_count = 0
-                unexpected_pass_count = 0
-
-                tests = []
-                for test_type in test_loader.test_types:
-                    tests.extend(test_loader.tests[test_type])
-
-                try:
-                    test_groups = test_source_cls.tests_by_group(tests, **test_source_kwargs)
-                except Exception:
-                    logger.critical("Loading tests failed")
+                    logger.info(
+                        "Repetition %i / %i" % (counts["repeat"], repeat))
+
+                iter_success = run_test_iteration(counts, test_loader,
+                                                  test_source_kwargs,
+                                                  test_source_cls, run_info,
+                                                  recording, test_environment,
+                                                  product, kwargs)
+                # if there were issues with the suite run
+                # (tests not loaded, etc.) return
+                if not iter_success:
                     return False
-
-                logger.suite_start(test_groups,
-                                   name='web-platform-test',
-                                   run_info=run_info,
-                                   extra={"run_by_dir": kwargs["run_by_dir"]})
-                for test_type in kwargs["test_types"]:
-                    logger.info("Running %s tests" % test_type)
-
-                    browser_cls = product.get_browser_cls(test_type)
-
-                    browser_kwargs = product.get_browser_kwargs(logger,
-                                                                test_type,
-                                                                run_info,
-                                                                config=test_environment.config,
-                                                                num_test_groups=len(test_groups),
-                                                                **kwargs)
-
-                    executor_cls = product.executor_classes.get(test_type)
-                    executor_kwargs = product.get_executor_kwargs(logger,
-                                                                  test_type,
-                                                                  test_environment,
-                                                                  run_info,
-                                                                  **kwargs)
-
-                    if executor_cls is None:
-                        logger.error("Unsupported test type %s for product %s" %
-                                     (test_type, product.name))
-                        continue
-
-                    for test in test_loader.disabled_tests[test_type]:
-                        logger.test_start(test.id)
-                        logger.test_end(test.id, status="SKIP")
-                        skipped_tests += 1
-
-                    if test_type == "testharness":
-                        run_tests = {"testharness": []}
-                        for test in test_loader.tests["testharness"]:
-                            if ((test.testdriver and not executor_cls.supports_testdriver) or
-                                (test.jsshell and not executor_cls.supports_jsshell)):
-                                logger.test_start(test.id)
-                                logger.test_end(test.id, status="SKIP")
-                                skipped_tests += 1
-                            else:
-                                run_tests["testharness"].append(test)
-                    else:
-                        run_tests = test_loader.tests
-
-                    recording.pause()
-                    with ManagerGroup("web-platform-tests",
-                                      kwargs["processes"],
-                                      test_source_cls,
-                                      test_source_kwargs,
-                                      browser_cls,
-                                      browser_kwargs,
-                                      executor_cls,
-                                      executor_kwargs,
-                                      kwargs["rerun"],
-                                      kwargs["pause_after_test"],
-                                      kwargs["pause_on_unexpected"],
-                                      kwargs["restart_on_unexpected"],
-                                      kwargs["debug_info"],
-                                      not kwargs["no_capture_stdio"],
-                                      recording=recording) as manager_group:
-                        try:
-                            manager_group.run(test_type, run_tests)
-                        except KeyboardInterrupt:
-                            logger.critical("Main thread got signal")
-                            manager_group.stop()
-                            raise
-                        test_count += manager_group.test_count()
-                        unexpected_count += manager_group.unexpected_count()
-                        unexpected_pass_count += manager_group.unexpected_pass_count()
                 recording.set(["after-end"])
-                test_total += test_count
-                unexpected_total += unexpected_count
-                unexpected_pass_total += unexpected_pass_count
-                logger.info("Got %i unexpected results, with %i unexpected passes" %
-                            (unexpected_count, unexpected_pass_count))
+                logger.info(
+                    "Got %i unexpected results, with %i unexpected passes" %
+                    (counts["unexpected"], counts["unexpected_pass"]))
                 logger.suite_end()
-                if repeat_until_unexpected and unexpected_total > 0:
-                    break
-                if repeat_count == 1 and len(test_loader.test_ids) == skipped_tests:
-                    break
 
-    if test_total == 0:
-        if skipped_tests > 0:
-            logger.warning("All requested tests were skipped")
-        else:
-            if kwargs["default_exclude"]:
-                logger.info("No tests ran")
-                return True
-            else:
-                logger.critical("No tests ran")
-                return False
+                # determine the longest test suite runtime seen
+                longest_iteration_time = max(
+                    longest_iteration_time,
+                    datetime.now() - iteration_start)
 
-    if unexpected_total and not kwargs["fail_on_unexpected"]:
-        logger.info("Tolerating %s unexpected results" % unexpected_total)
-        return True
-
-    all_unexpected_passed = (unexpected_total and
-                             unexpected_total == unexpected_pass_total)
-    if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]:
-        logger.info("Tolerating %i unexpected results because they all PASS" %
-                    unexpected_pass_total)
-        return True
+                if repeat_until_unexpected and counts["unexpected"] > 0:
+                    break
+                if counts["repeat"] == 1 \
+                        and len(test_loader.test_ids) == counts["skipped"]:
+                    break
 
-    return unexpected_total == 0
+    return evaluate_runs(counts, avoided_timeout, kwargs)
 
 
 def check_stability(**kwargs):

From 52e31bb96bf08504552bc4fff8e2d9e34ed02ab0 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 27 Dec 2021 05:43:32 -0800
Subject: [PATCH 02/25] fix flake8 issue

---
 tools/wptrunner/wptrunner/wptrunner.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 78d2567d44cec3..8f6cf5dc8ae7de 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -207,8 +207,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
         if test_type == "testharness":
             run_tests = {"testharness": []}
             for test in test_loader.tests["testharness"]:
-                if ((test.testdriver
-                    and not executor_cls.supports_testdriver) or
+                if ((test.testdriver and not executor_cls.supports_testdriver) or
                         (test.jsshell and not executor_cls.supports_jsshell)):
                     logger.test_start(test.id)
                     logger.test_end(test.id, status="SKIP")

From 33f8b1b811023509b0bfc8b8e129b8b55e07baed Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 27 Dec 2021 05:44:13 -0800
Subject: [PATCH 03/25] remove empty flag to trigger stability checks

---
 css/CSS2/borders/border-001.xht | 1 -
 1 file changed, 1 deletion(-)

diff --git a/css/CSS2/borders/border-001.xht b/css/CSS2/borders/border-001.xht
index 7b3fc200386498..f672d88a0dc4df 100644
--- a/css/CSS2/borders/border-001.xht
+++ b/css/CSS2/borders/border-001.xht
@@ -9,7 +9,6 @@
         <link rel="help" href="http://www.w3.org/TR/css3-background/#borders"/>
         <link rel="help" href="http://www.w3.org/TR/css3-background/#the-border-shorthands"/>
         <link rel="match" href="border-001-ref.xht"/>
-        <meta name="flags" content=""/>
         <meta name="assert" content="The 'border' shorthand property properly accepts and sets border-width."/>
         <style type="text/css">
             div

From 624e34122a5bf7297d071ef38cb72eacc77aed98 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 27 Dec 2021 19:05:51 -0800
Subject: [PATCH 04/25] some commenting explaining how iterations are being
 tracked

---
 tools/wptrunner/wptrunner/stability.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 96138bc347ab19..481fa598d546de 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -270,10 +270,17 @@ def run_step(logger, iterations, restart_after_iteration,
         kwargs["repeat"] = iterations
     else:
         kwargs["rerun"] = iterations
+
+    # Keep track if the runs were stopped early to avoid
+    # hitting the timeout. If so, the actual number of iterations run
+    # should be used to process the results. The number here will
+    # be set to the actual value for later reference if it changes.
     kwargs["avoided_timeout"] = {"did_avoid": False,
                                  "iterations_run": iterations}
-    if "max_time" in kwargs:
-        kwargs["max_time"] = timedelta(minutes=kwargs["verify_max_time"])
+
+    # Ensure the max time all iterations should run is passed
+    # to the wptrunner.run_tests to be used correctly.
+    kwargs["max_time"] = timedelta(minutes=kwargs["verify_max_time"])
 
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
@@ -295,9 +302,9 @@ def wrap_handler(x):
 
     wptrunner.run_tests(**kwargs)
 
-    # use the number of repeated test suites that were run
+    # Use the number of repeated test suites that were run
     # to process the results if the runs were stopped to
-    # avoid hitting a TC timeout.
+    # avoid hitting the maximum run time.
     if kwargs["avoided_timeout"]["did_avoid"]:
         iterations = kwargs["avoided_timeout"]["iterations_run"]
 

From 11b875330c6acfb349cfc9efd83011c43502d362 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 13:00:18 -0800
Subject: [PATCH 05/25] add --repeat-max-time flag

---
 css/CSS2/borders/border-001.xht             |  1 +
 tools/wptrunner/wptrunner/stability.py      |  4 ----
 tools/wptrunner/wptrunner/wptcommandline.py |  6 +++++-
 tools/wptrunner/wptrunner/wptrunner.py      | 10 ++++++++--
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/css/CSS2/borders/border-001.xht b/css/CSS2/borders/border-001.xht
index f672d88a0dc4df..7b3fc200386498 100644
--- a/css/CSS2/borders/border-001.xht
+++ b/css/CSS2/borders/border-001.xht
@@ -9,6 +9,7 @@
         <link rel="help" href="http://www.w3.org/TR/css3-background/#borders"/>
         <link rel="help" href="http://www.w3.org/TR/css3-background/#the-border-shorthands"/>
         <link rel="match" href="border-001-ref.xht"/>
+        <meta name="flags" content=""/>
         <meta name="assert" content="The 'border' shorthand property properly accepts and sets border-width."/>
         <style type="text/css">
             div
diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 481fa598d546de..5ee77cfc63ef94 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -278,10 +278,6 @@ def run_step(logger, iterations, restart_after_iteration,
     kwargs["avoided_timeout"] = {"did_avoid": False,
                                  "iterations_run": iterations}
 
-    # Ensure the max time all iterations should run is passed
-    # to the wptrunner.run_tests to be used correctly.
-    kwargs["max_time"] = timedelta(minutes=kwargs["verify_max_time"])
-
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
 
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
index b7c353012f19a6..b4311625dbb28e 100644
--- a/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -112,9 +112,13 @@ def create_parser(product_choices=None):
                                   dest="verify_chaos_mode",
                                   help="Enable chaos mode when running on Firefox")
     mode_group.add_argument("--verify-max-time", action="store",
-                            default=100,
+                            default=None,
                             help="The maximum number of minutes for the job to run",
                             type=int)
+    mode_group.add_argument("--repeat-max-time", action="store",
+                            default=100,
+                            help="The maximum number of minutes for the test suite to attempt repeat runs",
+                            type=int)
     output_results_group = mode_group.add_mutually_exclusive_group()
     output_results_group.add_argument("--verify-no-output-results", action="store_false",
                                       dest="verify_output_results",
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 8f6cf5dc8ae7de..54fcfb50a219a6 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -155,7 +155,7 @@ def get_pause_after_test(test_loader, **kwargs):
 
 def run_test_iteration(counts, test_loader, test_source_kwargs,
                        test_source_cls, run_info, recording,
-                       test_environment, product, kwargs):
+                       test_environment, product, **kwargs):
     """Runs the entire test suite.
     This is called for each repeat run requested."""
     tests = []
@@ -280,7 +280,7 @@ def evaluate_runs(counts, avoided_timeout, kwargs):
     return counts["unexpected"] == 0
 
 
-def run_tests(config, test_paths, product, max_time=None, **kwargs):
+def run_tests(config, test_paths, product, **kwargs):
     """Set up the test environment, load the list of tests to be executed, and
     invoke the remainder of the code to execute tests"""
     mp = mpcontext.get_context()
@@ -381,6 +381,10 @@ def run_tests(config, test_paths, product, max_time=None, **kwargs):
 
             recording.set(["startup"])
 
+            max_time = None
+            if "repeat_max_time" in kwargs:
+                max_time = kwargs["repeat_max_time"]
+
             repeat = kwargs["repeat"]
             repeat_until_unexpected = kwargs["repeat_until_unexpected"]
 
@@ -449,6 +453,7 @@ def check_stability(**kwargs):
     if kwargs["stability"]:
         logger.warning("--stability is deprecated; please use --verify instead!")
         kwargs['verify_max_time'] = None
+        kwargs['repeat_max_time'] = 100
         kwargs['verify_chaos_mode'] = False
         kwargs['verify_repeat_loop'] = 0
         kwargs['verify_repeat_restart'] = 10 if kwargs['repeat'] == 1 else kwargs['repeat']
@@ -456,6 +461,7 @@ def check_stability(**kwargs):
 
     return stability.check_stability(logger,
                                      max_time=kwargs['verify_max_time'],
+                                     repeat_max_time=kwargs['repeat_max_time'],
                                      chaos_mode=kwargs['verify_chaos_mode'],
                                      repeat_loop=kwargs['verify_repeat_loop'],
                                      repeat_restart=kwargs['verify_repeat_restart'],

From c28794ea559cdaac898eb89ca597c2da1e9be541 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 13:48:44 -0800
Subject: [PATCH 06/25] better descriptors for announcing results

If the repeat runs stop early to avoid, timeout, a message will be written in the results explaining so.
---
 tools/wptrunner/wptrunner/stability.py | 30 +++++++++++++++++++++-----
 tools/wptrunner/wptrunner/wptrunner.py |  2 --
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 5ee77cfc63ef94..17db7526d49ec7 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -325,12 +325,24 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_loop,
+                                            False,
+                                            kwargs_extra),
+                          repeat_loop))
 
         if repeat_restart:
             desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
                                                                          flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_restart,
+                                            True,
+                                            kwargs_extra),
+                          repeat_restart))
 
     return steps
 
@@ -350,7 +362,7 @@ def write_summary(logger, step_results, final_result):
     logger.info(':::')
 
 
-def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
+def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
                     max_time=None, output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
@@ -364,7 +376,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
     github_checks_outputter = get_gh_checks_outputter(
         kwargs["github_checks_text_file"])
 
-    for desc, step_func in steps:
+    for desc, step_func, expected_iterations in steps:
         if max_time and \
                 datetime.now() - start_time > timedelta(minutes=max_time):
             logger.info("::: Test verification is taking too long: Giving up!")
@@ -398,6 +410,14 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
             write_summary(logger, step_results, "FAIL")
             return 1
 
-        step_results.append((desc, "PASS"))
+        # If the tests passed but the number of iterations didn't match
+        # the number expected to run, it is likely that the runs were
+        # stopped early to avoid a timeout.
+        if iterations != expected_iterations:
+            result = "PASS *  %i/%i repeats completed" % (
+                iterations, expected_iterations)
+            step_results.append((desc, result))
+        else:
+            step_results.append((desc, "PASS"))
 
     write_summary(logger, step_results, "PASS")
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 54fcfb50a219a6..16214d25c26493 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -453,7 +453,6 @@ def check_stability(**kwargs):
     if kwargs["stability"]:
         logger.warning("--stability is deprecated; please use --verify instead!")
         kwargs['verify_max_time'] = None
-        kwargs['repeat_max_time'] = 100
         kwargs['verify_chaos_mode'] = False
         kwargs['verify_repeat_loop'] = 0
         kwargs['verify_repeat_restart'] = 10 if kwargs['repeat'] == 1 else kwargs['repeat']
@@ -461,7 +460,6 @@ def check_stability(**kwargs):
 
     return stability.check_stability(logger,
                                      max_time=kwargs['verify_max_time'],
-                                     repeat_max_time=kwargs['repeat_max_time'],
                                      chaos_mode=kwargs['verify_chaos_mode'],
                                      repeat_loop=kwargs['verify_repeat_loop'],
                                      repeat_restart=kwargs['verify_repeat_restart'],

From e0b33e656d25f1e1936daae19a88e33e9c65530b Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 13:54:39 -0800
Subject: [PATCH 07/25] cast max_time to timedelta

---
 tools/wptrunner/wptrunner/wptrunner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 16214d25c26493..6925fd10c8dd94 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -383,7 +383,7 @@ def run_tests(config, test_paths, product, **kwargs):
 
             max_time = None
             if "repeat_max_time" in kwargs:
-                max_time = kwargs["repeat_max_time"]
+                max_time = timedelta(minutes=kwargs["repeat_max_time"])
 
             repeat = kwargs["repeat"]
             repeat_until_unexpected = kwargs["repeat_until_unexpected"]

From d357344594abcac3ee830710aac27fc1088301c1 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 14:02:39 -0800
Subject: [PATCH 08/25] correct syntax for kwargs

---
 tools/wptrunner/wptrunner/wptrunner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 6925fd10c8dd94..7ef0403f33bb79 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -423,7 +423,7 @@ def run_tests(config, test_paths, product, **kwargs):
                                                   test_source_kwargs,
                                                   test_source_cls, run_info,
                                                   recording, test_environment,
-                                                  product, kwargs)
+                                                  product, **kwargs)
                 # if there were issues with the suite run
                 # (tests not loaded, etc.) return
                 if not iter_success:

From 02e4f8706b84495af34c36cfcedfe2d090a9606e Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 14:14:54 -0800
Subject: [PATCH 09/25] specify kwargs source for run_test_iteration

---
 tools/wptrunner/wptrunner/wptrunner.py | 38 +++++++++++++-------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 7ef0403f33bb79..0be2dcd413dfb1 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -155,7 +155,7 @@ def get_pause_after_test(test_loader, **kwargs):
 
 def run_test_iteration(counts, test_loader, test_source_kwargs,
                        test_source_cls, run_info, recording,
-                       test_environment, product, **kwargs):
+                       test_environment, product, run_test_kwargs):
     """Runs the entire test suite.
     This is called for each repeat run requested."""
     tests = []
@@ -172,8 +172,8 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
     logger.suite_start(test_groups,
                        name='web-platform-test',
                        run_info=run_info,
-                       extra={"run_by_dir": kwargs["run_by_dir"]})
-    for test_type in kwargs["test_types"]:
+                       extra={"run_by_dir": run_test_kwargs["run_by_dir"]})
+    for test_type in run_test_kwargs["test_types"]:
         logger.info("Running %s tests" % test_type)
 
         browser_cls = product.get_browser_cls(test_type)
@@ -185,14 +185,14 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
                                        config=test_environment.config,
                                        num_test_groups=len(
                                            test_groups),
-                                       **kwargs)
+                                       **run_test_kwargs)
 
         executor_cls = product.executor_classes.get(test_type)
         executor_kwargs = product.get_executor_kwargs(logger,
                                                       test_type,
                                                       test_environment,
                                                       run_info,
-                                                      **kwargs)
+                                                      **run_test_kwargs)
 
         if executor_cls is None:
             logger.error("Unsupported test type %s for product %s" %
@@ -219,19 +219,19 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
 
         recording.pause()
         with ManagerGroup("web-platform-tests",
-                          kwargs["processes"],
+                          run_test_kwargs["processes"],
                           test_source_cls,
                           test_source_kwargs,
                           browser_cls,
                           browser_kwargs,
                           executor_cls,
                           executor_kwargs,
-                          kwargs["rerun"],
-                          kwargs["pause_after_test"],
-                          kwargs["pause_on_unexpected"],
-                          kwargs["restart_on_unexpected"],
-                          kwargs["debug_info"],
-                          not kwargs["no_capture_stdio"],
+                          run_test_kwargs["rerun"],
+                          run_test_kwargs["pause_after_test"],
+                          run_test_kwargs["pause_on_unexpected"],
+                          run_test_kwargs["restart_on_unexpected"],
+                          run_test_kwargs["debug_info"],
+                          not run_test_kwargs["no_capture_stdio"],
                           recording=recording) as manager_group:
             try:
                 manager_group.run(test_type, run_tests)
@@ -246,27 +246,27 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
     return True
 
 
-def evaluate_runs(counts, avoided_timeout, kwargs):
+def evaluate_runs(counts, avoided_timeout, run_test_kwargs):
     """Evaluates the test counts after the
     given number of repeat runs has finished"""
     if counts["total_tests"] == 0:
         if counts["skipped"] > 0:
             logger.warning("All requested tests were skipped")
         else:
-            if kwargs["default_exclude"]:
+            if run_test_kwargs["default_exclude"]:
                 logger.info("No tests ran")
                 return True
             else:
                 logger.critical("No tests ran")
                 return False
 
-    if counts["unexpected"] and not kwargs["fail_on_unexpected"]:
+    if counts["unexpected"] and not run_test_kwargs["fail_on_unexpected"]:
         logger.info("Tolerating %s unexpected results" % counts["unexpected"])
         return True
 
     all_unexpected_passed = (counts["unexpected"] and
                              counts["unexpected"] == counts["unexpected_pass"])
-    if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]:
+    if all_unexpected_passed and not run_test_kwargs["fail_on_unexpected_pass"]:
         logger.info("Tolerating %i unexpected results because they all PASS" %
                     counts["unexpected_pass"])
         return True
@@ -275,8 +275,8 @@ def evaluate_runs(counts, avoided_timeout, kwargs):
     # the number of iterations that were run need to be returned
     # so that the test results can be processed appropriately.
     if avoided_timeout:
-        kwargs["avoided_timeout"]["did_avoid"] = True
-        kwargs["avoided_timeout"]["iterations_run"] = counts["repeat"]
+        run_test_kwargs["avoided_timeout"]["did_avoid"] = True
+        run_test_kwargs["avoided_timeout"]["iterations_run"] = counts["repeat"]
     return counts["unexpected"] == 0
 
 
@@ -423,7 +423,7 @@ def run_tests(config, test_paths, product, **kwargs):
                                                   test_source_kwargs,
                                                   test_source_cls, run_info,
                                                   recording, test_environment,
-                                                  product, **kwargs)
+                                                  product, kwargs)
                 # if there were issues with the suite run
                 # (tests not loaded, etc.) return
                 if not iter_success:

From 36db2c59a31569da2c8f0739eac1a4330aec3e08 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 14:54:38 -0800
Subject: [PATCH 10/25] remove empty css flags tag to trigger stability checks

---
 css/CSS2/borders/border-001.xht | 1 -
 1 file changed, 1 deletion(-)

diff --git a/css/CSS2/borders/border-001.xht b/css/CSS2/borders/border-001.xht
index 7b3fc200386498..f672d88a0dc4df 100644
--- a/css/CSS2/borders/border-001.xht
+++ b/css/CSS2/borders/border-001.xht
@@ -9,7 +9,6 @@
         <link rel="help" href="http://www.w3.org/TR/css3-background/#borders"/>
         <link rel="help" href="http://www.w3.org/TR/css3-background/#the-border-shorthands"/>
         <link rel="match" href="border-001-ref.xht"/>
-        <meta name="flags" content=""/>
         <meta name="assert" content="The 'border' shorthand property properly accepts and sets border-width."/>
         <style type="text/css">
             div

From 3f9bddbbb4f491ce2c9ac69f31c2e3f5b1a79d45 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 16:19:49 -0800
Subject: [PATCH 11/25] Add clause to handle an ineffective number of
 iterations completing

---
 tools/wptrunner/wptrunner/stability.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 17db7526d49ec7..742326616bf7bc 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -389,6 +389,16 @@ def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
         logger.info('::: Running test verification step "%s"...' % desc)
         logger.info(':::')
         results, inconsistent, slow, iterations = step_func(**kwargs)
+
+        if iterations <= 1:
+            step_results.append((desc, "FAIL"))
+            logger.info("::: Reached iteration timeout before finishing "
+                        "2 or more repeat runs.")
+            logger.info("::: At least 2 successful repeat runs are required "
+                        "to validate stability.")
+            write_summary(logger, step_results, "TIMEOUT")
+            return 1
+
         if output_results:
             write_results(logger.info, results, iterations)
 

From b3425cf1de2503c14a2bf1e47721cb4dcea485cd Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 28 Dec 2021 17:05:45 -0800
Subject: [PATCH 12/25] replace unassociated change used to trigger stability
 checks

---
 css/CSS2/borders/border-001.xht | 1 +
 1 file changed, 1 insertion(+)

diff --git a/css/CSS2/borders/border-001.xht b/css/CSS2/borders/border-001.xht
index f672d88a0dc4df..7b3fc200386498 100644
--- a/css/CSS2/borders/border-001.xht
+++ b/css/CSS2/borders/border-001.xht
@@ -9,6 +9,7 @@
         <link rel="help" href="http://www.w3.org/TR/css3-background/#borders"/>
         <link rel="help" href="http://www.w3.org/TR/css3-background/#the-border-shorthands"/>
         <link rel="match" href="border-001-ref.xht"/>
+        <meta name="flags" content=""/>
         <meta name="assert" content="The 'border' shorthand property properly accepts and sets border-width."/>
         <style type="text/css">
             div

From c93f895b397289dfeab73753cb0410530235bc69 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 4 Jan 2022 17:18:49 -0800
Subject: [PATCH 13/25] Implement changes suggested by @stephenmcgruer

---
 tools/wptrunner/wptrunner/stability.py      | 36 +++------
 tools/wptrunner/wptrunner/wptcommandline.py |  3 +-
 tools/wptrunner/wptrunner/wptrunner.py      | 90 ++++++++++-----------
 3 files changed, 56 insertions(+), 73 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 742326616bf7bc..7b8889be579af3 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -4,7 +4,7 @@
 import io
 import os
 from collections import OrderedDict, defaultdict
-from datetime import datetime, timedelta
+from datetime import datetime
 
 from mozlog import reader
 from mozlog.formatters import JSONFormatter
@@ -261,8 +261,7 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):
         log("</details>\n")
 
 
-def run_step(logger, iterations, restart_after_iteration,
-             kwargs_extras, **kwargs):
+def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
     from . import wptrunner
     kwargs = copy.deepcopy(kwargs)
 
@@ -275,8 +274,8 @@ def run_step(logger, iterations, restart_after_iteration,
     # hitting the timeout. If so, the actual number of iterations run
     # should be used to process the results. The number here will
     # be set to the actual value for later reference if it changes.
-    kwargs["avoided_timeout"] = {"did_avoid": False,
-                                 "iterations_run": iterations}
+    kwargs["timeout"] = {"triggered": False,
+                         "iterations_run": iterations}
 
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
@@ -301,8 +300,8 @@ def wrap_handler(x):
     # Use the number of repeated test suites that were run
     # to process the results if the runs were stopped to
     # avoid hitting the maximum run time.
-    if kwargs["avoided_timeout"]["did_avoid"]:
-        iterations = kwargs["avoided_timeout"]["iterations_run"]
+    if kwargs["timeout"]["triggered"]:
+        iterations = kwargs["timeout"]["iterations_run"]
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
@@ -325,12 +324,7 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
-            steps.append((desc,
-                          functools.partial(run_step,
-                                            logger,
-                                            repeat_loop,
-                                            False,
-                                            kwargs_extra),
+            steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra),
                           repeat_loop))
 
         if repeat_restart:
@@ -362,7 +356,7 @@ def write_summary(logger, step_results, final_result):
     logger.info(':::')
 
 
-def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
+def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
                     max_time=None, output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
@@ -377,11 +371,9 @@ def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
         kwargs["github_checks_text_file"])
 
     for desc, step_func, expected_iterations in steps:
-        if max_time and \
-                datetime.now() - start_time > timedelta(minutes=max_time):
+        if max_time and datetime.now() - start_time > max_time:
             logger.info("::: Test verification is taking too long: Giving up!")
-            logger.info(
-                "::: So far, all checks passed, but not all checks were run.")
+            logger.info("::: So far, all checks passed, but not all checks were run.")
             write_summary(logger, step_results, "TIMEOUT")
             return 2
 
@@ -390,7 +382,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
         logger.info(':::')
         results, inconsistent, slow, iterations = step_func(**kwargs)
 
-        if iterations <= 1:
+        if iterations <= 1 and expected_iterations > 1:
             step_results.append((desc, "FAIL"))
             logger.info("::: Reached iteration timeout before finishing "
                         "2 or more repeat runs.")
@@ -405,8 +397,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
         if inconsistent:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_inconsistent(
-                    github_checks_outputter.output, inconsistent, iterations)
+                write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
             write_inconsistent(logger.info, inconsistent, iterations)
             write_summary(logger, step_results, "FAIL")
             return 1
@@ -414,8 +405,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
         if slow:
             step_results.append((desc, "FAIL"))
             if github_checks_outputter:
-                write_github_checks_summary_slow_tests(
-                    github_checks_outputter.output, slow)
+                write_github_checks_summary_slow_tests(github_checks_outputter.output, slow)
             write_slow_tests(logger.info, slow)
             write_summary(logger, step_results, "FAIL")
             return 1
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
index b4311625dbb28e..44d0e333e5e00e 100644
--- a/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -3,6 +3,7 @@
 import sys
 from collections import OrderedDict
 from distutils.spawn import find_executable
+from datetime import timedelta
 
 from . import config
 from . import wpttest
@@ -114,7 +115,7 @@ def create_parser(product_choices=None):
     mode_group.add_argument("--verify-max-time", action="store",
                             default=None,
                             help="The maximum number of minutes for the job to run",
-                            type=int)
+                            type=lambda x: timedelta(minutes=float(x))))
     mode_group.add_argument("--repeat-max-time", action="store",
                             default=100,
                             help="The maximum number of minutes for the test suite to attempt repeat runs",
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 0be2dcd413dfb1..21352680202493 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -39,7 +39,6 @@
 metadata files are used to store the expected test results.
 """
 
-
 def setup_logging(*args, **kwargs):
     global logger
     logger = wptlogging.setup(*args, **kwargs)
@@ -246,7 +245,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
     return True
 
 
-def evaluate_runs(counts, avoided_timeout, run_test_kwargs):
+def evaluate_runs(counts, iteration_timeout, run_test_kwargs):
     """Evaluates the test counts after the
     given number of repeat runs has finished"""
     if counts["total_tests"] == 0:
@@ -274,9 +273,9 @@ def evaluate_runs(counts, avoided_timeout, run_test_kwargs):
     # If the runs were stopped early to avoid a TC timeout,
     # the number of iterations that were run need to be returned
     # so that the test results can be processed appropriately.
-    if avoided_timeout:
-        run_test_kwargs["avoided_timeout"]["did_avoid"] = True
-        run_test_kwargs["avoided_timeout"]["iterations_run"] = counts["repeat"]
+    if iteration_timeout:
+        run_test_kwargs["timeout"]["triggered"] = True
+        run_test_kwargs["timeout"]["iterations_run"] = counts["repeat"]
     return counts["unexpected"] == 0
 
 
@@ -289,10 +288,9 @@ def run_tests(config, test_paths, product, **kwargs):
     else:
         recorder = instruments.Instrument(kwargs["instrument_to_file"])
 
-    with recorder as recording, \
-        capture.CaptureIO(logger,
-                          not kwargs["no_capture_stdio"],
-                          mp_context=mp):
+    with recorder as recording, capture.CaptureIO(logger,
+                                                  not kwargs["no_capture_stdio"],
+                                                  mp_context=mp):
         recording.set(["startup"])
         env.do_delayed_imports(logger, test_paths)
 
@@ -306,28 +304,25 @@ def run_tests(config, test_paths, product, **kwargs):
             env_extras.append(FontInstaller(
                 logger,
                 font_dir=kwargs["font_dir"],
-                ahem=os.path.join(
-                    test_paths["/"]["tests_path"], "fonts/Ahem.ttf")
+                ahem=os.path.join(test_paths["/"]["tests_path"], "fonts/Ahem.ttf")
             ))
 
         recording.set(["startup", "load_tests"])
 
-        test_groups = \
-            (testloader.TestGroupsFile(logger, kwargs["test_groups_file"])
-             if kwargs["test_groups_file"] else None)
+        test_groups = (testloader.TestGroupsFile(logger, kwargs["test_groups_file"])
+                       if kwargs["test_groups_file"] else None)
 
         (test_source_cls,
          test_source_kwargs,
          chunker_kwargs) = testloader.get_test_src(logger=logger,
                                                    test_groups=test_groups,
                                                    **kwargs)
-        run_info, test_loader = \
-            get_loader(test_paths, product.name,
-                       run_info_extras=product.run_info_extras(
-                           **kwargs),
-                       chunker_kwargs=chunker_kwargs,
-                       test_groups=test_groups,
-                       **kwargs)
+        run_info, test_loader = get_loader(test_paths,
+                                           product.name,
+                                           run_info_extras=product.run_info_extras(**kwargs),
+                                           chunker_kwargs=chunker_kwargs,
+                                           test_groups=test_groups,
+                                           **kwargs)
 
         logger.info("Using %i client processes" % kwargs["processes"])
 
@@ -336,29 +331,21 @@ def run_tests(config, test_paths, product, **kwargs):
             logger.critical("Unable to find any tests at the path(s):")
             for path in kwargs["test_list"]:
                 logger.critical("  %s" % path)
-            logger.critical(
-                "Please check spelling and make sure"
-                " there are tests in the specified path(s).")
+            logger.critical("Please check spelling and make sure there are tests in the specified path(s).")
             return False
-        kwargs["pause_after_test"] = get_pause_after_test(
-            test_loader, **kwargs)
-
-        ssl_config = {
-            "type": kwargs["ssl_type"],
-            "openssl": {"openssl_binary": kwargs["openssl_binary"]},
-            "pregenerated": {"host_key_path": kwargs["host_key_path"],
-                             "host_cert_path": kwargs["host_cert_path"],
-                             "ca_cert_path": kwargs["ca_cert_path"]}
-        }
-
-        testharness_timeout_multipler = \
-            product.get_timeout_multiplier("testharness",
-                                           run_info,
-                                           **kwargs)
+        kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)
 
-        mojojs_path = None
-        if kwargs["enable_mojojs"]:
-            mojojs_path = kwargs["mojojs_path"]
+        ssl_config = {"type": kwargs["ssl_type"],
+                      "openssl": {"openssl_binary": kwargs["openssl_binary"]},
+                      "pregenerated": {"host_key_path": kwargs["host_key_path"],
+                                       "host_cert_path": kwargs["host_cert_path"],
+                                       "ca_cert_path": kwargs["ca_cert_path"]}}
+
+        testharness_timeout_multipler = product.get_timeout_multiplier("testharness",
+                                                                       run_info,
+                                                                       **kwargs)
+
+        mojojs_path = kwargs["mojojs_path"] if kwargs["enable_mojojs"] else None
 
         recording.set(["startup", "start_environment"])
         with env.TestEnvironment(test_paths,
@@ -393,7 +380,7 @@ def run_tests(config, test_paths, product, **kwargs):
             # to avoid a possible TC timeout.
             longest_iteration_time = timedelta()
             # keep track if we break the loop to avoid timeout.
-            avoided_timeout = False
+            iteration_timeout = False
 
             while counts["repeat"] < repeat or repeat_until_unexpected:
                 # if the next repeat run could cause the TC timeout to be
@@ -401,7 +388,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 estimate = datetime.now() + longest_iteration_time
                 if not repeat_until_unexpected and max_time \
                         and estimate >= start_time + max_time:
-                    avoided_timeout = True
+                    iteration_timeout = True
                     logger.info(
                         "Repeat runs are in danger of reaching timeout!"
                         " Quitting early.")
@@ -434,10 +421,15 @@ def run_tests(config, test_paths, product, **kwargs):
                     (counts["unexpected"], counts["unexpected_pass"]))
                 logger.suite_end()
 
-                # determine the longest test suite runtime seen
-                longest_iteration_time = max(
-                    longest_iteration_time,
-                    datetime.now() - iteration_start)
+                # Note this iteration's runtime and pad the total time
+                # by 10% to ensure ample time for the next iteration(s).
+                iteration_runtime = datetime.now() - iteration_start
+                iteration_runtime = timedelta(
+                    seconds=(iteration_runtime.total_seconds() * 1.1))
+
+                # determine the longest test suite runtime seen.
+                longest_iteration_time = max(longest_iteration_time,
+                                             iteration_runtime)
 
                 if repeat_until_unexpected and counts["unexpected"] > 0:
                     break
@@ -445,7 +437,7 @@ def run_tests(config, test_paths, product, **kwargs):
                         and len(test_loader.test_ids) == counts["skipped"]:
                     break
 
-    return evaluate_runs(counts, avoided_timeout, kwargs)
+    return evaluate_runs(counts, iteration_timeout, kwargs)
 
 
 def check_stability(**kwargs):

From 4d7e6f27371fbd84dda64c3a62dfbaf7a15a4093 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 4 Jan 2022 17:25:58 -0800
Subject: [PATCH 14/25] Add only necessary formatting changes to stability

---
 tools/wptrunner/wptrunner/stability.py      | 14 +++++++++-----
 tools/wptrunner/wptrunner/wptcommandline.py |  2 +-
 tools/wptrunner/wptrunner/wptrunner.py      |  1 -
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 7b8889be579af3..4c77499eb397a0 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -324,7 +324,12 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
-            steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra),
+            steps.append((desc,
+                          functools.partial(run_step,
+                                            logger,
+                                            repeat_loop,
+                                            False,
+                                            kwargs_extra),
                           repeat_loop))
 
         if repeat_restart:
@@ -356,8 +361,8 @@ def write_summary(logger, step_results, final_result):
     logger.info(':::')
 
 
-def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
-                    max_time=None, output_results=True, **kwargs):
+def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
+                    output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
         kwargs_extras.append({"chaos_mode_flags": "0xfb"})
@@ -367,8 +372,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True,
     start_time = datetime.now()
     step_results = []
 
-    github_checks_outputter = get_gh_checks_outputter(
-        kwargs["github_checks_text_file"])
+    github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
 
     for desc, step_func, expected_iterations in steps:
         if max_time and datetime.now() - start_time > max_time:
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
index 44d0e333e5e00e..e2b6bc55bd08c2 100644
--- a/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -115,7 +115,7 @@ def create_parser(product_choices=None):
     mode_group.add_argument("--verify-max-time", action="store",
                             default=None,
                             help="The maximum number of minutes for the job to run",
-                            type=lambda x: timedelta(minutes=float(x))))
+                            type=lambda x: timedelta(minutes=float(x)))
     mode_group.add_argument("--repeat-max-time", action="store",
                             default=100,
                             help="The maximum number of minutes for the test suite to attempt repeat runs",
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 21352680202493..ad51c6199a0565 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -287,7 +287,6 @@ def run_tests(config, test_paths, product, **kwargs):
         recorder = instruments.NullInstrument()
     else:
         recorder = instruments.Instrument(kwargs["instrument_to_file"])
-
     with recorder as recording, capture.CaptureIO(logger,
                                                   not kwargs["no_capture_stdio"],
                                                   mp_context=mp):

From 2350b767a1bc0d29950c3dd856340d1dcdb05926 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 14:07:57 -0800
Subject: [PATCH 15/25] wptrunner reformatting changes suggested by @jgraham

---
 tools/wptrunner/wptrunner/wptrunner.py | 43 ++++++++++----------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index ad51c6199a0565..d61c95452f39a8 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -177,14 +177,12 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
 
         browser_cls = product.get_browser_cls(test_type)
 
-        browser_kwargs = \
-            product.get_browser_kwargs(logger,
-                                       test_type,
-                                       run_info,
-                                       config=test_environment.config,
-                                       num_test_groups=len(
-                                           test_groups),
-                                       **run_test_kwargs)
+        browser_kwargs = product.get_browser_kwargs(logger,
+                                                    test_type,
+                                                    run_info,
+                                                    config=test_environment.config,
+                                                    num_test_groups=len(test_groups),
+                                                    **run_test_kwargs)
 
         executor_cls = product.executor_classes.get(test_type)
         executor_kwargs = product.get_executor_kwargs(logger,
@@ -246,8 +244,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
 
 
 def evaluate_runs(counts, iteration_timeout, run_test_kwargs):
-    """Evaluates the test counts after the
-    given number of repeat runs has finished"""
+    """Evaluates the test counts after the given number of repeat runs has finished"""
     if counts["total_tests"] == 0:
         if counts["skipped"] > 0:
             logger.warning("All requested tests were skipped")
@@ -385,15 +382,10 @@ def run_tests(config, test_paths, product, **kwargs):
                 # if the next repeat run could cause the TC timeout to be
                 # reached, stop now and use the test results we have.
                 estimate = datetime.now() + longest_iteration_time
-                if not repeat_until_unexpected and max_time \
-                        and estimate >= start_time + max_time:
+                if (not repeat_until_unexpected and max_time
+                        and estimate >= start_time + max_time):
                     iteration_timeout = True
-                    logger.info(
-                        "Repeat runs are in danger of reaching timeout!"
-                        " Quitting early.")
-                    logger.info(
-                        "Ran %s of %s iterations." %
-                        (counts["repeat"], repeat))
+                    logger.info(f"Ran {counts['repeat']} of {repeat} iterations.")
                     break
 
                 # begin tracking runtime of the test suite
@@ -402,8 +394,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 if repeat_until_unexpected:
                     logger.info("Repetition %i" % (counts["repeat"]))
                 elif repeat > 1:
-                    logger.info(
-                        "Repetition %i / %i" % (counts["repeat"], repeat))
+                    logger.info(f"Repetition {counts['repeat']} / {repeat}")
 
                 iter_success = run_test_iteration(counts, test_loader,
                                                   test_source_kwargs,
@@ -415,16 +406,14 @@ def run_tests(config, test_paths, product, **kwargs):
                 if not iter_success:
                     return False
                 recording.set(["after-end"])
-                logger.info(
-                    "Got %i unexpected results, with %i unexpected passes" %
-                    (counts["unexpected"], counts["unexpected_pass"]))
+                logger.info(f"Got {counts['unexpected']} unexpected results, "
+                    f"with {counts['unexpected_pass']} unexpected passes")
                 logger.suite_end()
 
                 # Note this iteration's runtime and pad the total time
                 # by 10% to ensure ample time for the next iteration(s).
                 iteration_runtime = datetime.now() - iteration_start
-                iteration_runtime = timedelta(
-                    seconds=(iteration_runtime.total_seconds() * 1.1))
+                iteration_runtime = timedelta(seconds=(iteration_runtime.total_seconds() * 1.1))
 
                 # determine the longest test suite runtime seen.
                 longest_iteration_time = max(longest_iteration_time,
@@ -432,8 +421,8 @@ def run_tests(config, test_paths, product, **kwargs):
 
                 if repeat_until_unexpected and counts["unexpected"] > 0:
                     break
-                if counts["repeat"] == 1 \
-                        and len(test_loader.test_ids) == counts["skipped"]:
+                if (counts["repeat"] == 1
+                        and len(test_loader.test_ids) == counts["skipped"]):
                     break
 
     return evaluate_runs(counts, iteration_timeout, kwargs)

From 01a2f07e6294a3badc69001de17a1dd7d5dcf808 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 14:32:19 -0800
Subject: [PATCH 16/25] functional changes to stability tests suggested by
 @jgraham

---
 tools/wptrunner/wptrunner/stability.py | 17 +++--------------
 tools/wptrunner/wptrunner/wptrunner.py | 23 +++++++----------------
 2 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 4c77499eb397a0..15a20d50cc794d 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -270,13 +270,6 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
     else:
         kwargs["rerun"] = iterations
 
-    # Keep track if the runs were stopped early to avoid
-    # hitting the timeout. If so, the actual number of iterations run
-    # should be used to process the results. The number here will
-    # be set to the actual value for later reference if it changes.
-    kwargs["timeout"] = {"triggered": False,
-                         "iterations_run": iterations}
-
     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)
 
@@ -295,13 +288,9 @@ def wrap_handler(x):
     # warning+ level logs only
     logger.add_handler(StreamHandler(log, JSONFormatter()))
 
-    wptrunner.run_tests(**kwargs)
-
-    # Use the number of repeated test suites that were run
-    # to process the results if the runs were stopped to
-    # avoid hitting the maximum run time.
-    if kwargs["timeout"]["triggered"]:
-        iterations = kwargs["timeout"]["iterations_run"]
+    # Use the number of iterations of the test suite that were run to process the results.
+    # if the runs were stopped to avoid hitting the maximum run time.
+    _, iterations = wptrunner.run_tests(**kwargs)
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index d61c95452f39a8..87188a8dd905e3 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -243,7 +243,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
     return True
 
 
-def evaluate_runs(counts, iteration_timeout, run_test_kwargs):
+def evaluate_runs(counts, run_test_kwargs):
     """Evaluates the test counts after the given number of repeat runs has finished"""
     if counts["total_tests"] == 0:
         if counts["skipped"] > 0:
@@ -267,12 +267,6 @@ def evaluate_runs(counts, iteration_timeout, run_test_kwargs):
                     counts["unexpected_pass"])
         return True
 
-    # If the runs were stopped early to avoid a TC timeout,
-    # the number of iterations that were run need to be returned
-    # so that the test results can be processed appropriately.
-    if iteration_timeout:
-        run_test_kwargs["timeout"]["triggered"] = True
-        run_test_kwargs["timeout"]["iterations_run"] = counts["repeat"]
     return counts["unexpected"] == 0
 
 
@@ -375,16 +369,15 @@ def run_tests(config, test_paths, product, **kwargs):
             # test suite iteration so that the runs can be stopped
             # to avoid a possible TC timeout.
             longest_iteration_time = timedelta()
-            # keep track if we break the loop to avoid timeout.
-            iteration_timeout = False
 
             while counts["repeat"] < repeat or repeat_until_unexpected:
                 # if the next repeat run could cause the TC timeout to be
                 # reached, stop now and use the test results we have.
-                estimate = datetime.now() + longest_iteration_time
+                # Pad the total time by 10% to ensure ample time for the next iteration(s).
+                estimate = (datetime.now() +
+                            timedelta(seconds=(longest_iteration_time.total_seconds() * 1.1)))
                 if (not repeat_until_unexpected and max_time
                         and estimate >= start_time + max_time):
-                    iteration_timeout = True
                     logger.info(f"Ran {counts['repeat']} of {repeat} iterations.")
                     break
 
@@ -410,11 +403,8 @@ def run_tests(config, test_paths, product, **kwargs):
                     f"with {counts['unexpected_pass']} unexpected passes")
                 logger.suite_end()
 
-                # Note this iteration's runtime and pad the total time
-                # by 10% to ensure ample time for the next iteration(s).
+                # Note this iteration's runtime
                 iteration_runtime = datetime.now() - iteration_start
-                iteration_runtime = timedelta(seconds=(iteration_runtime.total_seconds() * 1.1))
-
                 # determine the longest test suite runtime seen.
                 longest_iteration_time = max(longest_iteration_time,
                                              iteration_runtime)
@@ -425,7 +415,8 @@ def run_tests(config, test_paths, product, **kwargs):
                         and len(test_loader.test_ids) == counts["skipped"]):
                     break
 
-    return evaluate_runs(counts, iteration_timeout, kwargs)
+    # Return the evaluation of the runs and the number of repeated iterations that were run.
+    return evaluate_runs(counts, kwargs), counts["repeat"]
 
 
 def check_stability(**kwargs):

From 2d9fe27ec45bd694a9ea741603552b49b60a73f6 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 14:34:31 -0800
Subject: [PATCH 17/25] flake8 changes "line break before binary operator"

---
 tools/wptrunner/wptrunner/wptrunner.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 87188a8dd905e3..d2645230fd86a2 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -376,8 +376,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 # Pad the total time by 10% to ensure ample time for the next iteration(s).
                 estimate = (datetime.now() +
                             timedelta(seconds=(longest_iteration_time.total_seconds() * 1.1)))
-                if (not repeat_until_unexpected and max_time
-                        and estimate >= start_time + max_time):
+                if not repeat_until_unexpected and max_time and estimate >= start_time + max_time:
                     logger.info(f"Ran {counts['repeat']} of {repeat} iterations.")
                     break
 
@@ -411,8 +410,7 @@ def run_tests(config, test_paths, product, **kwargs):
 
                 if repeat_until_unexpected and counts["unexpected"] > 0:
                     break
-                if (counts["repeat"] == 1
-                        and len(test_loader.test_ids) == counts["skipped"]):
+                if counts["repeat"] == 1 and len(test_loader.test_ids) == counts["skipped"]:
                     break
 
     # Return the evaluation of the runs and the number of repeated iterations that were run.

From 330124ab3f38545ad6ca9cedc51d3dcb44c0ddf0 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 14:51:44 -0800
Subject: [PATCH 18/25] change run_tests to return counts object

---
 tools/wptrunner/wptrunner/stability.py | 3 ++-
 tools/wptrunner/wptrunner/wptrunner.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 15a20d50cc794d..4a0422e3e50dc0 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -290,7 +290,8 @@ def wrap_handler(x):
 
     # Use the number of iterations of the test suite that were run to process the results.
     # if the runs were stopped to avoid hitting the maximum run time.
-    _, iterations = wptrunner.run_tests(**kwargs)
+    _, counts = wptrunner.run_tests(**kwargs)
+    iterations = counts["repeat"]
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index d2645230fd86a2..e8d39a3241eaa1 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -414,7 +414,7 @@ def run_tests(config, test_paths, product, **kwargs):
                     break
 
     # Return the evaluation of the runs and the number of repeated iterations that were run.
-    return evaluate_runs(counts, kwargs), counts["repeat"]
+    return evaluate_runs(counts, kwargs), counts
 
 
 def check_stability(**kwargs):

From 899b7944484106fa76d78d7c9d2aae36c57b13a5 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 17:06:05 -0800
Subject: [PATCH 19/25] ensure run_tests return values are properly accessed

Now that wptrunner's run_tests returns more than 1 value, the return type will be a tuple for the older variables that expect only 1 value. We need to ensure that we pull the expected first value (boolean) out of that tuple.
---
 tools/wptrunner/wptrunner/stability.py | 3 +--
 tools/wptrunner/wptrunner/wptrunner.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 4a0422e3e50dc0..15a20d50cc794d 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -290,8 +290,7 @@ def wrap_handler(x):
 
     # Use the number of iterations of the test suite that were run to process the results.
     # if the runs were stopped to avoid hitting the maximum run time.
-    _, counts = wptrunner.run_tests(**kwargs)
-    iterations = counts["repeat"]
+    _, iterations = wptrunner.run_tests(**kwargs)
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index e8d39a3241eaa1..a4695eb1b39149 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -414,7 +414,7 @@ def run_tests(config, test_paths, product, **kwargs):
                     break
 
     # Return the evaluation of the runs and the number of repeated iterations that were run.
-    return evaluate_runs(counts, kwargs), counts
+    return evaluate_runs(counts, kwargs), counts["repeat"]
 
 
 def check_stability(**kwargs):
@@ -454,7 +454,7 @@ def start(**kwargs):
         elif kwargs["verify"] or kwargs["stability"]:
             rv = check_stability(**kwargs) or logged_critical.has_log
         else:
-            rv = not run_tests(**kwargs) or logged_critical.has_log
+            rv = not run_tests(**kwargs)[0] or logged_critical.has_log
     finally:
         logger.remove_handler(handler)
     return rv

From 42d9f5e6de33faf2868d8b7f9baf0fdd2960284e Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Sat, 8 Jan 2022 17:25:23 -0800
Subject: [PATCH 20/25] run_tests has consistent return values even in fail
 cases

wptrunner's run_tests would return a tuple only if not issues arose while running, and would return only a boolean in the case of some expected issue. Now a tuple is returned in all instances as expected.
---
 tools/wptrunner/wptrunner/wptrunner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index a4695eb1b39149..eddfd0af9acf5a 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -322,7 +322,7 @@ def run_tests(config, test_paths, product, **kwargs):
             for path in kwargs["test_list"]:
                 logger.critical("  %s" % path)
             logger.critical("Please check spelling and make sure there are tests in the specified path(s).")
-            return False
+            return False, 0
         kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)
 
         ssl_config = {"type": kwargs["ssl_type"],
@@ -396,7 +396,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 # if there were issues with the suite run
                 # (tests not loaded, etc.) return
                 if not iter_success:
-                    return False
+                    return False, counts["repeat"]
                 recording.set(["after-end"])
                 logger.info(f"Got {counts['unexpected']} unexpected results, "
                     f"with {counts['unexpected_pass']} unexpected passes")

From a13f48820115f05ac5e5235665a0a9ed31cdc53d Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 10 Jan 2022 10:26:57 -0800
Subject: [PATCH 21/25] Return full counts in TestStatus class

run_tests will now return a new TestStatus object rather than returning only the number of iterations run. This will allow for more robust statistics to be shown in the future.
---
 tools/wptrunner/wptrunner/stability.py |  3 ++-
 tools/wptrunner/wptrunner/wptrunner.py | 30 ++++++++++++++++++--------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 15a20d50cc794d..cf143d7f586c0f 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -290,7 +290,8 @@ def wrap_handler(x):
 
     # Use the number of iterations of the test suite that were run to process the results.
     # if the runs were stopped to avoid hitting the maximum run time.
-    _, iterations = wptrunner.run_tests(**kwargs)
+    _, test_status = wptrunner.run_tests(**kwargs)
+    iterations = test_status.repeated_runs
 
     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index eddfd0af9acf5a..fefdab4df11f90 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -270,6 +270,17 @@ def evaluate_runs(counts, run_test_kwargs):
     return counts["unexpected"] == 0
 
 
+class TestStatus:
+    """Class that stores information on the results of test runs for reference"""
+    def __init__(self, counts):
+        self.total_tests = counts["total_tests"]
+        self.skipped = counts["skipped"]
+        self.unexpected = counts["unexpected"]
+        self.unexpected_pass = counts["unexpected_pass"]
+        self.repeated_runs = counts["repeat"]
+        self.expected_repeated_runs = counts["expected_repeat"]
+
+
 def run_tests(config, test_paths, product, **kwargs):
     """Set up the test environment, load the list of tests to be executed, and
     invoke the remainder of the code to execute tests"""
@@ -317,12 +328,15 @@ def run_tests(config, test_paths, product, **kwargs):
         logger.info("Using %i client processes" % kwargs["processes"])
 
         counts = defaultdict(int)
+        repeat = kwargs["repeat"]
+        counts["expected_repeat"] = repeat
+
         if len(test_loader.test_ids) == 0 and kwargs["test_list"]:
             logger.critical("Unable to find any tests at the path(s):")
             for path in kwargs["test_list"]:
                 logger.critical("  %s" % path)
             logger.critical("Please check spelling and make sure there are tests in the specified path(s).")
-            return False, 0
+            return False, TestStatus(counts)
         kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)
 
         ssl_config = {"type": kwargs["ssl_type"],
@@ -362,17 +376,15 @@ def run_tests(config, test_paths, product, **kwargs):
             if "repeat_max_time" in kwargs:
                 max_time = timedelta(minutes=kwargs["repeat_max_time"])
 
-            repeat = kwargs["repeat"]
             repeat_until_unexpected = kwargs["repeat_until_unexpected"]
 
-            # keep track of longest time taken to complete a
-            # test suite iteration so that the runs can be stopped
-            # to avoid a possible TC timeout.
+            # keep track of longest time taken to complete a test suite iteration
+            # so that the runs can be stopped to avoid a possible TC timeout.
             longest_iteration_time = timedelta()
 
             while counts["repeat"] < repeat or repeat_until_unexpected:
-                # if the next repeat run could cause the TC timeout to be
-                # reached, stop now and use the test results we have.
+                # if the next repeat run could cause the TC timeout to be reached,
+                # stop now and use the test results we have.
                 # Pad the total time by 10% to ensure ample time for the next iteration(s).
                 estimate = (datetime.now() +
                             timedelta(seconds=(longest_iteration_time.total_seconds() * 1.1)))
@@ -396,7 +408,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 # if there were issues with the suite run
                 # (tests not loaded, etc.) return
                 if not iter_success:
-                    return False, counts["repeat"]
+                    return False, TestStatus(counts)
                 recording.set(["after-end"])
                 logger.info(f"Got {counts['unexpected']} unexpected results, "
                     f"with {counts['unexpected_pass']} unexpected passes")
@@ -414,7 +426,7 @@ def run_tests(config, test_paths, product, **kwargs):
                     break
 
     # Return the evaluation of the runs and the number of repeated iterations that were run.
-    return evaluate_runs(counts, kwargs), counts["repeat"]
+    return evaluate_runs(counts, kwargs), TestStatus(counts)
 
 
 def check_stability(**kwargs):

From e9d90c8896efebeb7ddb1d0873e45384753f0e86 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 10 Jan 2022 10:33:11 -0800
Subject: [PATCH 22/25] small formatting changes

reducing some comments and logs to taking less vertical space.
---
 tools/wptrunner/wptrunner/stability.py | 11 ++++-------
 tools/wptrunner/wptrunner/wptrunner.py |  3 +--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index cf143d7f586c0f..152a0d0c5dd711 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -378,10 +378,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
 
         if iterations <= 1 and expected_iterations > 1:
             step_results.append((desc, "FAIL"))
-            logger.info("::: Reached iteration timeout before finishing "
-                        "2 or more repeat runs.")
-            logger.info("::: At least 2 successful repeat runs are required "
-                        "to validate stability.")
+            logger.info("::: Reached iteration timeout before finishing 2 or more repeat runs.")
+            logger.info("::: At least 2 successful repeat runs are required to validate stability.")
             write_summary(logger, step_results, "TIMEOUT")
             return 1
 
@@ -404,9 +402,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
             write_summary(logger, step_results, "FAIL")
             return 1
 
-        # If the tests passed but the number of iterations didn't match
-        # the number expected to run, it is likely that the runs were
-        # stopped early to avoid a timeout.
+        # If the tests passed but the number of iterations didn't match the number expected to run,
+        # it is likely that the runs were stopped early to avoid a timeout.
         if iterations != expected_iterations:
             result = "PASS *  %i/%i repeats completed" % (
                 iterations, expected_iterations)
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index fefdab4df11f90..b61e2d03a4d7a7 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -405,8 +405,7 @@ def run_tests(config, test_paths, product, **kwargs):
                                                   test_source_cls, run_info,
                                                   recording, test_environment,
                                                   product, kwargs)
-                # if there were issues with the suite run
-                # (tests not loaded, etc.) return
+                # if there were issues with the suite run(tests not loaded, etc.) return
                 if not iter_success:
                     return False, TestStatus(counts)
                 recording.set(["after-end"])

From 9d27581be56b4280801d3e87c8fe2ff6b31a3ac1 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Mon, 10 Jan 2022 10:53:24 -0800
Subject: [PATCH 23/25] small wording change

TestStatus docstring
---
 tools/wptrunner/wptrunner/wptrunner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index b61e2d03a4d7a7..6200f5add29021 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -271,7 +271,7 @@ def evaluate_runs(counts, run_test_kwargs):
 
 
 class TestStatus:
-    """Class that stores information on the results of test runs for reference"""
+    """Class that stores information on the results of test runs for later reference"""
     def __init__(self, counts):
         self.total_tests = counts["total_tests"]
         self.skipped = counts["skipped"]

From 9d11203f45cfc8ffcd78ad8ca6008e731892dc82 Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 11 Jan 2022 09:43:49 -0800
Subject: [PATCH 24/25] Replace counts with TestStatus object

forego the use of defaultdict counts keeping track of test info/results and instead use the custom class TestStatus.
---
 tools/wptrunner/wptrunner/wptrunner.py | 86 ++++++++++++--------------
 1 file changed, 41 insertions(+), 45 deletions(-)

diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 6200f5add29021..c56ef789c29755 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -2,7 +2,6 @@
 import os
 import sys
 from datetime import datetime, timedelta
-from collections import defaultdict
 
 import wptserve
 from wptserve import sslutils
@@ -152,9 +151,8 @@ def get_pause_after_test(test_loader, **kwargs):
     return kwargs["pause_after_test"]
 
 
-def run_test_iteration(counts, test_loader, test_source_kwargs,
-                       test_source_cls, run_info, recording,
-                       test_environment, product, run_test_kwargs):
+def run_test_iteration(test_status, test_loader, test_source_kwargs, test_source_cls, run_info,
+                       recording, test_environment, product, run_test_kwargs):
     """Runs the entire test suite.
     This is called for each repeat run requested."""
     tests = []
@@ -199,7 +197,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
         for test in test_loader.disabled_tests[test_type]:
             logger.test_start(test.id)
             logger.test_end(test.id, status="SKIP")
-            counts["skipped"] += 1
+            test_status.skipped += 1
 
         if test_type == "testharness":
             run_tests = {"testharness": []}
@@ -208,7 +206,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
                         (test.jsshell and not executor_cls.supports_jsshell)):
                     logger.test_start(test.id)
                     logger.test_end(test.id, status="SKIP")
-                    counts["skipped"] += 1
+                    test_status.skipped += 1
                 else:
                     run_tests["testharness"].append(test)
         else:
@@ -236,17 +234,17 @@ def run_test_iteration(counts, test_loader, test_source_kwargs,
                 logger.critical("Main thread got signal")
                 manager_group.stop()
                 raise
-            counts["total_tests"] += manager_group.test_count()
-            counts["unexpected"] += manager_group.unexpected_count()
-            counts["unexpected_pass"] += manager_group.unexpected_pass_count()
+            test_status.total_tests += manager_group.test_count()
+            test_status.unexpected += manager_group.unexpected_count()
+            test_status.unexpected_pass += manager_group.unexpected_pass_count()
 
     return True
 
 
-def evaluate_runs(counts, run_test_kwargs):
+def evaluate_runs(test_status, run_test_kwargs):
     """Evaluates the test counts after the given number of repeat runs has finished"""
-    if counts["total_tests"] == 0:
-        if counts["skipped"] > 0:
+    if test_status.total_tests == 0:
+        if test_status.skipped > 0:
             logger.warning("All requested tests were skipped")
         else:
             if run_test_kwargs["default_exclude"]:
@@ -256,29 +254,29 @@ def evaluate_runs(counts, run_test_kwargs):
                 logger.critical("No tests ran")
                 return False
 
-    if counts["unexpected"] and not run_test_kwargs["fail_on_unexpected"]:
-        logger.info("Tolerating %s unexpected results" % counts["unexpected"])
+    if test_status.unexpected and not run_test_kwargs["fail_on_unexpected"]:
+        logger.info(f"Tolerating {test_status.unexpected} unexpected results")
         return True
 
-    all_unexpected_passed = (counts["unexpected"] and
-                             counts["unexpected"] == counts["unexpected_pass"])
+    all_unexpected_passed = (test_status.unexpected and
+                             test_status.unexpected == test_status.unexpected_pass)
     if all_unexpected_passed and not run_test_kwargs["fail_on_unexpected_pass"]:
-        logger.info("Tolerating %i unexpected results because they all PASS" %
-                    counts["unexpected_pass"])
+        logger.info(f"Tolerating {test_status.unexpected_pass} unexpected results "
+                    "because they all PASS")
         return True
 
-    return counts["unexpected"] == 0
+    return test_status.unexpected == 0
 
 
 class TestStatus:
     """Class that stores information on the results of test runs for later reference"""
-    def __init__(self, counts):
-        self.total_tests = counts["total_tests"]
-        self.skipped = counts["skipped"]
-        self.unexpected = counts["unexpected"]
-        self.unexpected_pass = counts["unexpected_pass"]
-        self.repeated_runs = counts["repeat"]
-        self.expected_repeated_runs = counts["expected_repeat"]
+    def __init__(self):
+        self.total_tests = 0
+        self.skipped = 0
+        self.unexpected = 0
+        self.unexpected_pass = 0
+        self.repeated_runs = 0
+        self.expected_repeated_runs = 0
 
 
 def run_tests(config, test_paths, product, **kwargs):
@@ -327,16 +325,16 @@ def run_tests(config, test_paths, product, **kwargs):
 
         logger.info("Using %i client processes" % kwargs["processes"])
 
-        counts = defaultdict(int)
+        test_status = TestStatus()
         repeat = kwargs["repeat"]
-        counts["expected_repeat"] = repeat
+        test_status.expected_repeat = repeat
 
         if len(test_loader.test_ids) == 0 and kwargs["test_list"]:
             logger.critical("Unable to find any tests at the path(s):")
             for path in kwargs["test_list"]:
                 logger.critical("  %s" % path)
             logger.critical("Please check spelling and make sure there are tests in the specified path(s).")
-            return False, TestStatus(counts)
+            return False, test_status
         kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)
 
         ssl_config = {"type": kwargs["ssl_type"],
@@ -382,35 +380,33 @@ def run_tests(config, test_paths, product, **kwargs):
             # so that the runs can be stopped to avoid a possible TC timeout.
             longest_iteration_time = timedelta()
 
-            while counts["repeat"] < repeat or repeat_until_unexpected:
+            while test_status.repeated_runs < repeat or repeat_until_unexpected:
                 # if the next repeat run could cause the TC timeout to be reached,
                 # stop now and use the test results we have.
                 # Pad the total time by 10% to ensure ample time for the next iteration(s).
                 estimate = (datetime.now() +
                             timedelta(seconds=(longest_iteration_time.total_seconds() * 1.1)))
                 if not repeat_until_unexpected and max_time and estimate >= start_time + max_time:
-                    logger.info(f"Ran {counts['repeat']} of {repeat} iterations.")
+                    logger.info(f"Ran {test_status.repeated_runs} of {repeat} iterations.")
                     break
 
                 # begin tracking runtime of the test suite
                 iteration_start = datetime.now()
-                counts["repeat"] += 1
+                test_status.repeated_runs += 1
                 if repeat_until_unexpected:
-                    logger.info("Repetition %i" % (counts["repeat"]))
+                    logger.info("Repetition %i" % (test_status.repeated_runs))
                 elif repeat > 1:
-                    logger.info(f"Repetition {counts['repeat']} / {repeat}")
+                    logger.info(f"Repetition {test_status.repeated_runs} / {repeat}")
 
-                iter_success = run_test_iteration(counts, test_loader,
-                                                  test_source_kwargs,
-                                                  test_source_cls, run_info,
-                                                  recording, test_environment,
-                                                  product, kwargs)
+                iter_success = run_test_iteration(test_status, test_loader, test_source_kwargs,
+                                                  test_source_cls, run_info, recording,
+                                                  test_environment, product, kwargs)
                 # if there were issues with the suite run(tests not loaded, etc.) return
                 if not iter_success:
-                    return False, TestStatus(counts)
+                    return False, test_status
                 recording.set(["after-end"])
-                logger.info(f"Got {counts['unexpected']} unexpected results, "
-                    f"with {counts['unexpected_pass']} unexpected passes")
+                logger.info(f"Got {test_status.unexpected} unexpected results, "
+                    f"with {test_status.unexpected_pass} unexpected passes")
                 logger.suite_end()
 
                 # Note this iteration's runtime
@@ -419,13 +415,13 @@ def run_tests(config, test_paths, product, **kwargs):
                 longest_iteration_time = max(longest_iteration_time,
                                              iteration_runtime)
 
-                if repeat_until_unexpected and counts["unexpected"] > 0:
+                if repeat_until_unexpected and test_status.unexpected > 0:
                     break
-                if counts["repeat"] == 1 and len(test_loader.test_ids) == counts["skipped"]:
+                if test_status.repeated_runs == 1 and len(test_loader.test_ids) == test_status.skipped:
                     break
 
     # Return the evaluation of the runs and the number of repeated iterations that were run.
-    return evaluate_runs(counts, kwargs), TestStatus(counts)
+    return evaluate_runs(test_status, kwargs), test_status
 
 
 def check_stability(**kwargs):

From 6d00729a0d799f69812b5db8ddd9e60282adc3ed Mon Sep 17 00:00:00 2001
From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com>
Date: Tue, 11 Jan 2022 09:53:26 -0800
Subject: [PATCH 25/25] convert some log strings to f-strings

---
 tools/wptrunner/wptrunner/stability.py | 3 +--
 tools/wptrunner/wptrunner/wptrunner.py | 7 +++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index 152a0d0c5dd711..ad319a5942e1b3 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -405,8 +405,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
         # If the tests passed but the number of iterations didn't match the number expected to run,
         # it is likely that the runs were stopped early to avoid a timeout.
         if iterations != expected_iterations:
-            result = "PASS *  %i/%i repeats completed" % (
-                iterations, expected_iterations)
+            result = f"PASS *  {iterations}/{expected_iterations} repeats completed"
             step_results.append((desc, result))
         else:
             step_results.append((desc, "PASS"))
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index c56ef789c29755..7fbe7768efd446 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -171,7 +171,7 @@ def run_test_iteration(test_status, test_loader, test_source_kwargs, test_source
                        run_info=run_info,
                        extra={"run_by_dir": run_test_kwargs["run_by_dir"]})
     for test_type in run_test_kwargs["test_types"]:
-        logger.info("Running %s tests" % test_type)
+        logger.info(f"Running {test_type} tests")
 
         browser_cls = product.get_browser_cls(test_type)
 
@@ -190,8 +190,7 @@ def run_test_iteration(test_status, test_loader, test_source_kwargs, test_source
                                                       **run_test_kwargs)
 
         if executor_cls is None:
-            logger.error("Unsupported test type %s for product %s" %
-                         (test_type, product.name))
+            logger.error(f"Unsupported test type {test_type} for product {product.name}")
             continue
 
         for test in test_loader.disabled_tests[test_type]:
@@ -394,7 +393,7 @@ def run_tests(config, test_paths, product, **kwargs):
                 iteration_start = datetime.now()
                 test_status.repeated_runs += 1
                 if repeat_until_unexpected:
-                    logger.info("Repetition %i" % (test_status.repeated_runs))
+                    logger.info(f"Repetition {test_status.repeated_runs}")
                 elif repeat > 1:
                     logger.info(f"Repetition {test_status.repeated_runs} / {repeat}")