From 3058ead8e169a71e6700cbec0c6837e8650fdb37 Mon Sep 17 00:00:00 2001 From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com> Date: Mon, 27 Dec 2021 05:24:57 -0800 Subject: [PATCH 01/25] allow stability checks to avoid TC timeout by checking times in between repeat runs --- tools/wptrunner/wptrunner/stability.py | 35 +- tools/wptrunner/wptrunner/wptcommandline.py | 5 +- tools/wptrunner/wptrunner/wptrunner.py | 373 ++++++++++++-------- 3 files changed, 250 insertions(+), 163 deletions(-) diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py index eeb5af23002993..96138bc347ab19 100644 --- a/tools/wptrunner/wptrunner/stability.py +++ b/tools/wptrunner/wptrunner/stability.py @@ -4,7 +4,7 @@ import io import os from collections import OrderedDict, defaultdict -from datetime import datetime +from datetime import datetime, timedelta from mozlog import reader from mozlog.formatters import JSONFormatter @@ -261,7 +261,8 @@ def write_results(log, results, iterations, pr_number=None, use_details=False): log("\n") -def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs): +def run_step(logger, iterations, restart_after_iteration, + kwargs_extras, **kwargs): from . import wptrunner kwargs = copy.deepcopy(kwargs) @@ -269,6 +270,10 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg kwargs["repeat"] = iterations else: kwargs["rerun"] = iterations + kwargs["avoided_timeout"] = {"did_avoid": False, + "iterations_run": iterations} + if "max_time" in kwargs: + kwargs["max_time"] = timedelta(minutes=kwargs["verify_max_time"]) kwargs["pause_after_test"] = False kwargs.update(kwargs_extras) @@ -290,6 +295,12 @@ def wrap_handler(x): wptrunner.run_tests(**kwargs) + # use the number of repeated test suites that were run + # to process the results if the runs were stopped to + # avoid hitting a TC timeout. + if kwargs["avoided_timeout"]["did_avoid"]: + iterations = kwargs["avoided_timeout"]["iterations_run"] + logger._state.handlers = initial_handlers logger._state.running_tests = set() logger._state.suite_started = False @@ -335,8 +346,9 @@ def write_summary(logger, step_results, final_result): logger.info(':::') -def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None, - output_results=True, **kwargs): + +def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, + max_time=None, output_results=True, **kwargs): kwargs_extras = [{}] if chaos_mode and kwargs["product"] == "firefox": kwargs_extras.append({"chaos_mode_flags": "0xfb"}) @@ -346,12 +358,15 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m start_time = datetime.now() step_results = [] - github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"]) + github_checks_outputter = get_gh_checks_outputter( + kwargs["github_checks_text_file"]) for desc, step_func in steps: - if max_time and datetime.now() - start_time > max_time: + if max_time and \ + datetime.now() - start_time > timedelta(minutes=max_time): logger.info("::: Test verification is taking too long: Giving up!") - logger.info("::: So far, all checks passed, but not all checks were run.") + logger.info( + "::: So far, all checks passed, but not all checks were run.") write_summary(logger, step_results, "TIMEOUT") return 2 @@ -365,7 +380,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m if inconsistent: step_results.append((desc, "FAIL")) if github_checks_outputter: - write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations) + write_github_checks_summary_inconsistent( + github_checks_outputter.output, inconsistent, iterations) write_inconsistent(logger.info, inconsistent, iterations) write_summary(logger, step_results, "FAIL") return 1 @@ -373,7 +389,8 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m if slow: step_results.append((desc, "FAIL")) if github_checks_outputter: - write_github_checks_summary_slow_tests(github_checks_outputter.output, slow) + write_github_checks_summary_slow_tests( + github_checks_outputter.output, slow) write_slow_tests(logger.info, slow) write_summary(logger, step_results, "FAIL") return 1 diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py index fd75f115649077..b7c353012f19a6 100644 --- a/tools/wptrunner/wptrunner/wptcommandline.py +++ b/tools/wptrunner/wptrunner/wptcommandline.py @@ -3,7 +3,6 @@ import sys from collections import OrderedDict from distutils.spawn import find_executable -from datetime import timedelta from . import config from . import wpttest @@ -113,9 +112,9 @@ def create_parser(product_choices=None): dest="verify_chaos_mode", help="Enable chaos mode when running on Firefox") mode_group.add_argument("--verify-max-time", action="store", - default=None, + default=100, help="The maximum number of minutes for the job to run", - type=lambda x: timedelta(minutes=float(x))) + type=int) output_results_group = mode_group.add_mutually_exclusive_group() output_results_group.add_argument("--verify-no-output-results", action="store_false", dest="verify_output_results", diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py index 76ef3dbc9901ca..78d2567d44cec3 100644 --- a/tools/wptrunner/wptrunner/wptrunner.py +++ b/tools/wptrunner/wptrunner/wptrunner.py @@ -1,6 +1,8 @@ import json import os import sys +from datetime import datetime, timedelta +from collections import defaultdict import wptserve from wptserve import sslutils @@ -37,6 +39,7 @@ metadata files are used to store the expected test results. """ + def setup_logging(*args, **kwargs): global logger logger = wptlogging.setup(*args, **kwargs) @@ -150,7 +153,135 @@ def get_pause_after_test(test_loader, **kwargs): return kwargs["pause_after_test"] -def run_tests(config, test_paths, product, **kwargs): +def run_test_iteration(counts, test_loader, test_source_kwargs, + test_source_cls, run_info, recording, + test_environment, product, kwargs): + """Runs the entire test suite. + This is called for each repeat run requested.""" + tests = [] + for test_type in test_loader.test_types: + tests.extend(test_loader.tests[test_type]) + + try: + test_groups = test_source_cls.tests_by_group( + tests, **test_source_kwargs) + except Exception: + logger.critical("Loading tests failed") + return False + + logger.suite_start(test_groups, + name='web-platform-test', + run_info=run_info, + extra={"run_by_dir": kwargs["run_by_dir"]}) + for test_type in kwargs["test_types"]: + logger.info("Running %s tests" % test_type) + + browser_cls = product.get_browser_cls(test_type) + + browser_kwargs = \ + product.get_browser_kwargs(logger, + test_type, + run_info, + config=test_environment.config, + num_test_groups=len( + test_groups), + **kwargs) + + executor_cls = product.executor_classes.get(test_type) + executor_kwargs = product.get_executor_kwargs(logger, + test_type, + test_environment, + run_info, + **kwargs) + + if executor_cls is None: + logger.error("Unsupported test type %s for product %s" % + (test_type, product.name)) + continue + + for test in test_loader.disabled_tests[test_type]: + logger.test_start(test.id) + logger.test_end(test.id, status="SKIP") + counts["skipped"] += 1 + + if test_type == "testharness": + run_tests = {"testharness": []} + for test in test_loader.tests["testharness"]: + if ((test.testdriver + and not executor_cls.supports_testdriver) or + (test.jsshell and not executor_cls.supports_jsshell)): + logger.test_start(test.id) + logger.test_end(test.id, status="SKIP") + counts["skipped"] += 1 + else: + run_tests["testharness"].append(test) + else: + run_tests = test_loader.tests + + recording.pause() + with ManagerGroup("web-platform-tests", + kwargs["processes"], + test_source_cls, + test_source_kwargs, + browser_cls, + browser_kwargs, + executor_cls, + executor_kwargs, + kwargs["rerun"], + kwargs["pause_after_test"], + kwargs["pause_on_unexpected"], + kwargs["restart_on_unexpected"], + kwargs["debug_info"], + not kwargs["no_capture_stdio"], + recording=recording) as manager_group: + try: + manager_group.run(test_type, run_tests) + except KeyboardInterrupt: + logger.critical("Main thread got signal") + manager_group.stop() + raise + counts["total_tests"] += manager_group.test_count() + counts["unexpected"] += manager_group.unexpected_count() + counts["unexpected_pass"] += manager_group.unexpected_pass_count() + + return True + + +def evaluate_runs(counts, avoided_timeout, kwargs): + """Evaluates the test counts after the + given number of repeat runs has finished""" + if counts["total_tests"] == 0: + if counts["skipped"] > 0: + logger.warning("All requested tests were skipped") + else: + if kwargs["default_exclude"]: + logger.info("No tests ran") + return True + else: + logger.critical("No tests ran") + return False + + if counts["unexpected"] and not kwargs["fail_on_unexpected"]: + logger.info("Tolerating %s unexpected results" % counts["unexpected"]) + return True + + all_unexpected_passed = (counts["unexpected"] and + counts["unexpected"] == counts["unexpected_pass"]) + if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]: + logger.info("Tolerating %i unexpected results because they all PASS" % + counts["unexpected_pass"]) + return True + + # If the runs were stopped early to avoid a TC timeout, + # the number of iterations that were run need to be returned + # so that the test results can be processed appropriately. + if avoided_timeout: + kwargs["avoided_timeout"]["did_avoid"] = True + kwargs["avoided_timeout"]["iterations_run"] = counts["repeat"] + return counts["unexpected"] == 0 + + +def run_tests(config, test_paths, product, max_time=None, **kwargs): """Set up the test environment, load the list of tests to be executed, and invoke the remainder of the code to execute tests""" mp = mpcontext.get_context() @@ -158,9 +289,11 @@ def run_tests(config, test_paths, product, **kwargs): recorder = instruments.NullInstrument() else: recorder = instruments.Instrument(kwargs["instrument_to_file"]) - with recorder as recording, capture.CaptureIO(logger, - not kwargs["no_capture_stdio"], - mp_context=mp): + + with recorder as recording, \ + capture.CaptureIO(logger, + not kwargs["no_capture_stdio"], + mp_context=mp): recording.set(["startup"]) env.do_delayed_imports(logger, test_paths) @@ -174,52 +307,59 @@ def run_tests(config, test_paths, product, **kwargs): env_extras.append(FontInstaller( logger, font_dir=kwargs["font_dir"], - ahem=os.path.join(test_paths["/"]["tests_path"], "fonts/Ahem.ttf") + ahem=os.path.join( + test_paths["/"]["tests_path"], "fonts/Ahem.ttf") )) recording.set(["startup", "load_tests"]) - test_groups = (testloader.TestGroupsFile(logger, kwargs["test_groups_file"]) - if kwargs["test_groups_file"] else None) + test_groups = \ + (testloader.TestGroupsFile(logger, kwargs["test_groups_file"]) + if kwargs["test_groups_file"] else None) (test_source_cls, test_source_kwargs, chunker_kwargs) = testloader.get_test_src(logger=logger, test_groups=test_groups, **kwargs) - run_info, test_loader = get_loader(test_paths, - product.name, - run_info_extras=product.run_info_extras(**kwargs), - chunker_kwargs=chunker_kwargs, - test_groups=test_groups, - **kwargs) + run_info, test_loader = \ + get_loader(test_paths, product.name, + run_info_extras=product.run_info_extras( + **kwargs), + chunker_kwargs=chunker_kwargs, + test_groups=test_groups, + **kwargs) logger.info("Using %i client processes" % kwargs["processes"]) - skipped_tests = 0 - test_total = 0 - unexpected_total = 0 - unexpected_pass_total = 0 - + counts = defaultdict(int) if len(test_loader.test_ids) == 0 and kwargs["test_list"]: logger.critical("Unable to find any tests at the path(s):") for path in kwargs["test_list"]: logger.critical(" %s" % path) - logger.critical("Please check spelling and make sure there are tests in the specified path(s).") + logger.critical( + "Please check spelling and make sure" + " there are tests in the specified path(s).") return False - kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs) - - ssl_config = {"type": kwargs["ssl_type"], - "openssl": {"openssl_binary": kwargs["openssl_binary"]}, - "pregenerated": {"host_key_path": kwargs["host_key_path"], - "host_cert_path": kwargs["host_cert_path"], - "ca_cert_path": kwargs["ca_cert_path"]}} - - testharness_timeout_multipler = product.get_timeout_multiplier("testharness", - run_info, - **kwargs) + kwargs["pause_after_test"] = get_pause_after_test( + test_loader, **kwargs) + + ssl_config = { + "type": kwargs["ssl_type"], + "openssl": {"openssl_binary": kwargs["openssl_binary"]}, + "pregenerated": {"host_key_path": kwargs["host_key_path"], + "host_cert_path": kwargs["host_cert_path"], + "ca_cert_path": kwargs["ca_cert_path"]} + } + + testharness_timeout_multipler = \ + product.get_timeout_multiplier("testharness", + run_info, + **kwargs) - mojojs_path = kwargs["mojojs_path"] if kwargs["enable_mojojs"] else None + mojojs_path = None + if kwargs["enable_mojojs"]: + mojojs_path = kwargs["mojojs_path"] recording.set(["startup", "start_environment"]) with env.TestEnvironment(test_paths, @@ -235,6 +375,7 @@ def run_tests(config, test_paths, product, **kwargs): recording.set(["startup", "ensure_environment"]) try: test_environment.ensure_started() + start_time = datetime.now() except env.TestEnvironmentError as e: logger.critical("Error starting test environment: %s" % e) raise @@ -242,136 +383,66 @@ def run_tests(config, test_paths, product, **kwargs): recording.set(["startup"]) repeat = kwargs["repeat"] - repeat_count = 0 repeat_until_unexpected = kwargs["repeat_until_unexpected"] - while repeat_count < repeat or repeat_until_unexpected: - repeat_count += 1 + # keep track of longest time taken to complete a + # test suite iteration so that the runs can be stopped + # to avoid a possible TC timeout. + longest_iteration_time = timedelta() + # keep track if we break the loop to avoid timeout. + avoided_timeout = False + + while counts["repeat"] < repeat or repeat_until_unexpected: + # if the next repeat run could cause the TC timeout to be + # reached, stop now and use the test results we have. + estimate = datetime.now() + longest_iteration_time + if not repeat_until_unexpected and max_time \ + and estimate >= start_time + max_time: + avoided_timeout = True + logger.info( + "Repeat runs are in danger of reaching timeout!" + " Quitting early.") + logger.info( + "Ran %s of %s iterations." % + (counts["repeat"], repeat)) + break + + # begin tracking runtime of the test suite + iteration_start = datetime.now() + counts["repeat"] += 1 if repeat_until_unexpected: - logger.info("Repetition %i" % (repeat_count)) + logger.info("Repetition %i" % (counts["repeat"])) elif repeat > 1: - logger.info("Repetition %i / %i" % (repeat_count, repeat)) - - test_count = 0 - unexpected_count = 0 - unexpected_pass_count = 0 - - tests = [] - for test_type in test_loader.test_types: - tests.extend(test_loader.tests[test_type]) - - try: - test_groups = test_source_cls.tests_by_group(tests, **test_source_kwargs) - except Exception: - logger.critical("Loading tests failed") + logger.info( + "Repetition %i / %i" % (counts["repeat"], repeat)) + + iter_success = run_test_iteration(counts, test_loader, + test_source_kwargs, + test_source_cls, run_info, + recording, test_environment, + product, kwargs) + # if there were issues with the suite run + # (tests not loaded, etc.) return + if not iter_success: return False - - logger.suite_start(test_groups, - name='web-platform-test', - run_info=run_info, - extra={"run_by_dir": kwargs["run_by_dir"]}) - for test_type in kwargs["test_types"]: - logger.info("Running %s tests" % test_type) - - browser_cls = product.get_browser_cls(test_type) - - browser_kwargs = product.get_browser_kwargs(logger, - test_type, - run_info, - config=test_environment.config, - num_test_groups=len(test_groups), - **kwargs) - - executor_cls = product.executor_classes.get(test_type) - executor_kwargs = product.get_executor_kwargs(logger, - test_type, - test_environment, - run_info, - **kwargs) - - if executor_cls is None: - logger.error("Unsupported test type %s for product %s" % - (test_type, product.name)) - continue - - for test in test_loader.disabled_tests[test_type]: - logger.test_start(test.id) - logger.test_end(test.id, status="SKIP") - skipped_tests += 1 - - if test_type == "testharness": - run_tests = {"testharness": []} - for test in test_loader.tests["testharness"]: - if ((test.testdriver and not executor_cls.supports_testdriver) or - (test.jsshell and not executor_cls.supports_jsshell)): - logger.test_start(test.id) - logger.test_end(test.id, status="SKIP") - skipped_tests += 1 - else: - run_tests["testharness"].append(test) - else: - run_tests = test_loader.tests - - recording.pause() - with ManagerGroup("web-platform-tests", - kwargs["processes"], - test_source_cls, - test_source_kwargs, - browser_cls, - browser_kwargs, - executor_cls, - executor_kwargs, - kwargs["rerun"], - kwargs["pause_after_test"], - kwargs["pause_on_unexpected"], - kwargs["restart_on_unexpected"], - kwargs["debug_info"], - not kwargs["no_capture_stdio"], - recording=recording) as manager_group: - try: - manager_group.run(test_type, run_tests) - except KeyboardInterrupt: - logger.critical("Main thread got signal") - manager_group.stop() - raise - test_count += manager_group.test_count() - unexpected_count += manager_group.unexpected_count() - unexpected_pass_count += manager_group.unexpected_pass_count() recording.set(["after-end"]) - test_total += test_count - unexpected_total += unexpected_count - unexpected_pass_total += unexpected_pass_count - logger.info("Got %i unexpected results, with %i unexpected passes" % - (unexpected_count, unexpected_pass_count)) + logger.info( + "Got %i unexpected results, with %i unexpected passes" % + (counts["unexpected"], counts["unexpected_pass"])) logger.suite_end() - if repeat_until_unexpected and unexpected_total > 0: - break - if repeat_count == 1 and len(test_loader.test_ids) == skipped_tests: - break - if test_total == 0: - if skipped_tests > 0: - logger.warning("All requested tests were skipped") - else: - if kwargs["default_exclude"]: - logger.info("No tests ran") - return True - else: - logger.critical("No tests ran") - return False + # determine the longest test suite runtime seen + longest_iteration_time = max( + longest_iteration_time, + datetime.now() - iteration_start) - if unexpected_total and not kwargs["fail_on_unexpected"]: - logger.info("Tolerating %s unexpected results" % unexpected_total) - return True - - all_unexpected_passed = (unexpected_total and - unexpected_total == unexpected_pass_total) - if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]: - logger.info("Tolerating %i unexpected results because they all PASS" % - unexpected_pass_total) - return True + if repeat_until_unexpected and counts["unexpected"] > 0: + break + if counts["repeat"] == 1 \ + and len(test_loader.test_ids) == counts["skipped"]: + break - return unexpected_total == 0 + return evaluate_runs(counts, avoided_timeout, kwargs) def check_stability(**kwargs): From 52e31bb96bf08504552bc4fff8e2d9e34ed02ab0 Mon Sep 17 00:00:00 2001 From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com> Date: Mon, 27 Dec 2021 05:43:32 -0800 Subject: [PATCH 02/25] fix flake8 issue --- tools/wptrunner/wptrunner/wptrunner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py index 78d2567d44cec3..8f6cf5dc8ae7de 100644 --- a/tools/wptrunner/wptrunner/wptrunner.py +++ b/tools/wptrunner/wptrunner/wptrunner.py @@ -207,8 +207,7 @@ def run_test_iteration(counts, test_loader, test_source_kwargs, if test_type == "testharness": run_tests = {"testharness": []} for test in test_loader.tests["testharness"]: - if ((test.testdriver - and not executor_cls.supports_testdriver) or + if ((test.testdriver and not executor_cls.supports_testdriver) or (test.jsshell and not executor_cls.supports_jsshell)): logger.test_start(test.id) logger.test_end(test.id, status="SKIP") From 33f8b1b811023509b0bfc8b8e129b8b55e07baed Mon Sep 17 00:00:00 2001 From: Daniel Smith <56164590+DanielRyanSmith@users.noreply.github.com> Date: Mon, 27 Dec 2021 05:44:13 -0800 Subject: [PATCH 03/25] remove empty flag to trigger stability checks --- css/CSS2/borders/border-001.xht | 1 - 1 file changed, 1 deletion(-) diff --git a/css/CSS2/borders/border-001.xht b/css/CSS2/borders/border-001.xht index 7b3fc200386498..f672d88a0dc4df 100644 --- a/css/CSS2/borders/border-001.xht +++ b/css/CSS2/borders/border-001.xht @@ -9,7 +9,6 @@ -