Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

7660 Avoid CI stability checks timing out #32202

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3058ead
allow stability checks to avoid TC timeout by checking times in betwe…
DanielRyanSmith Dec 27, 2021
52e31bb
fix flake8 issue
DanielRyanSmith Dec 27, 2021
33f8b1b
remove empty flag to trigger stability checks
DanielRyanSmith Dec 27, 2021
624e341
some commenting explaining how iterations are being tracked
DanielRyanSmith Dec 28, 2021
11b8753
add --repeat-max-time flag
DanielRyanSmith Dec 28, 2021
c28794e
better descriptors for announcing results
DanielRyanSmith Dec 28, 2021
e0b33e6
cast max_time to timedelta
DanielRyanSmith Dec 28, 2021
d357344
correct syntax for kwargs
DanielRyanSmith Dec 28, 2021
02e4f87
specify kwargs source for run_test_iteration
DanielRyanSmith Dec 28, 2021
36db2c5
remove empty css flags tag to trigger stability checks
DanielRyanSmith Dec 28, 2021
3f9bddb
Add clause to handle an ineffective number of iterations completing
DanielRyanSmith Dec 29, 2021
b3425cf
replace unassociated change used to trigger stability checks
DanielRyanSmith Dec 29, 2021
c93f895
Implement changes suggested by @stephenmcgruer
DanielRyanSmith Jan 5, 2022
4d7e6f2
Add only necessary formatting changes to stability
DanielRyanSmith Jan 5, 2022
2252d91
Merge branch 'master' into 7660-stability-timeout
DanielRyanSmith Jan 5, 2022
2350b76
wptrunner reformatting changes suggested by @jgraham
DanielRyanSmith Jan 8, 2022
01a2f07
functional changes to stability tests suggested by @jgraham
DanielRyanSmith Jan 8, 2022
2d9fe27
flake8 changes "line break before binary operator"
DanielRyanSmith Jan 8, 2022
330124a
change run_tests to return counts object
DanielRyanSmith Jan 8, 2022
899b794
ensure run_tests return values are properly accessed
DanielRyanSmith Jan 9, 2022
42d9f5e
run_tests has consistent return values even in fail cases
DanielRyanSmith Jan 9, 2022
a13f488
Return full counts in TestStatus class
DanielRyanSmith Jan 10, 2022
e9d90c8
small formatting changes
DanielRyanSmith Jan 10, 2022
9d27581
small wording change
DanielRyanSmith Jan 10, 2022
9d11203
Replace counts with TestStatus object
DanielRyanSmith Jan 11, 2022
6d00729
convert some log strings to f-strings
DanielRyanSmith Jan 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 63 additions & 13 deletions tools/wptrunner/wptrunner/stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import io
import os
from collections import OrderedDict, defaultdict
from datetime import datetime
from datetime import datetime, timedelta

from mozlog import reader
from mozlog.formatters import JSONFormatter
Expand Down Expand Up @@ -261,7 +261,8 @@ def write_results(log, results, iterations, pr_number=None, use_details=False):
log("</details>\n")


def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
def run_step(logger, iterations, restart_after_iteration,
kwargs_extras, **kwargs):
from . import wptrunner
kwargs = copy.deepcopy(kwargs)

Expand All @@ -270,6 +271,13 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
else:
kwargs["rerun"] = iterations

# Keep track if the runs were stopped early to avoid
# hitting the timeout. If so, the actual number of iterations run
# should be used to process the results. The number here will
# be set to the actual value for later reference if it changes.
kwargs["avoided_timeout"] = {"did_avoid": False,
DanielRyanSmith marked this conversation as resolved.
Show resolved Hide resolved
"iterations_run": iterations}

kwargs["pause_after_test"] = False
kwargs.update(kwargs_extras)

Expand All @@ -290,6 +298,12 @@ def wrap_handler(x):

wptrunner.run_tests(**kwargs)

# Use the number of repeated test suites that were run
# to process the results if the runs were stopped to
# avoid hitting the maximum run time.
if kwargs["avoided_timeout"]["did_avoid"]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using kwargs as an output mechanism from wptrunner.run_tests is clever, but potentially confusing. I leave it to other reviewers (who are still involved with WPT and will have to maintain this ;)) to decide if they want to suggest a different method for communicating this information back to stability.py from the runner.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was hesitant about taking this approach. I know we definitely want to have access to the number of actual iterations that were run, but returning that information from wptrunner's run_tests implementation in a circumstance that it is only used when called by stability tests didn't feel right either. I am totally open to changing this implementation if anyone has suggestions or thinks a rework of run_tests is worth the change.

iterations = kwargs["avoided_timeout"]["iterations_run"]

logger._state.handlers = initial_handlers
logger._state.running_tests = set()
logger._state.suite_started = False
Expand All @@ -311,12 +325,24 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
if repeat_loop:
desc = "Running tests in a loop %d times%s" % (repeat_loop,
flags_string)
steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))
steps.append((desc,
functools.partial(run_step,
logger,
repeat_loop,
False,
kwargs_extra),
repeat_loop))

if repeat_restart:
desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
flags_string)
steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))
steps.append((desc,
functools.partial(run_step,
logger,
repeat_restart,
True,
kwargs_extra),
repeat_restart))

return steps

Expand All @@ -335,8 +361,9 @@ def write_summary(logger, step_results, final_result):

logger.info(':::')

def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
output_results=True, **kwargs):

def check_stability(logger, repeat_loop=10, repeat_restart=10, chaos_mode=True,
DanielRyanSmith marked this conversation as resolved.
Show resolved Hide resolved
max_time=None, output_results=True, **kwargs):
kwargs_extras = [{}]
if chaos_mode and kwargs["product"] == "firefox":
kwargs_extras.append({"chaos_mode_flags": "0xfb"})
Expand All @@ -346,38 +373,61 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
start_time = datetime.now()
step_results = []

github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])
github_checks_outputter = get_gh_checks_outputter(
kwargs["github_checks_text_file"])

for desc, step_func in steps:
if max_time and datetime.now() - start_time > max_time:
for desc, step_func, expected_iterations in steps:
if max_time and \
datetime.now() - start_time > timedelta(minutes=max_time):
logger.info("::: Test verification is taking too long: Giving up!")
logger.info("::: So far, all checks passed, but not all checks were run.")
logger.info(
"::: So far, all checks passed, but not all checks were run.")
write_summary(logger, step_results, "TIMEOUT")
return 2

logger.info(':::')
logger.info('::: Running test verification step "%s"...' % desc)
logger.info(':::')
results, inconsistent, slow, iterations = step_func(**kwargs)

if iterations <= 1:
DanielRyanSmith marked this conversation as resolved.
Show resolved Hide resolved
step_results.append((desc, "FAIL"))
logger.info("::: Reached iteration timeout before finishing "
"2 or more repeat runs.")
logger.info("::: At least 2 successful repeat runs are required "
"to validate stability.")
write_summary(logger, step_results, "TIMEOUT")
return 1

if output_results:
write_results(logger.info, results, iterations)

if inconsistent:
step_results.append((desc, "FAIL"))
if github_checks_outputter:
write_github_checks_summary_inconsistent(github_checks_outputter.output, inconsistent, iterations)
write_github_checks_summary_inconsistent(
github_checks_outputter.output, inconsistent, iterations)
write_inconsistent(logger.info, inconsistent, iterations)
write_summary(logger, step_results, "FAIL")
return 1

if slow:
step_results.append((desc, "FAIL"))
if github_checks_outputter:
write_github_checks_summary_slow_tests(github_checks_outputter.output, slow)
write_github_checks_summary_slow_tests(
github_checks_outputter.output, slow)
write_slow_tests(logger.info, slow)
write_summary(logger, step_results, "FAIL")
return 1

step_results.append((desc, "PASS"))
# If the tests passed but the number of iterations didn't match
# the number expected to run, it is likely that the runs were
# stopped early to avoid a timeout.
if iterations != expected_iterations:
result = "PASS * %i/%i repeats completed" % (
iterations, expected_iterations)
step_results.append((desc, result))
else:
step_results.append((desc, "PASS"))

write_summary(logger, step_results, "PASS")
7 changes: 5 additions & 2 deletions tools/wptrunner/wptrunner/wptcommandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import sys
from collections import OrderedDict
from distutils.spawn import find_executable
from datetime import timedelta

from . import config
from . import wpttest
Expand Down Expand Up @@ -115,7 +114,11 @@ def create_parser(product_choices=None):
mode_group.add_argument("--verify-max-time", action="store",
default=None,
help="The maximum number of minutes for the job to run",
type=lambda x: timedelta(minutes=float(x)))
type=int)
mode_group.add_argument("--repeat-max-time", action="store",
default=100,
help="The maximum number of minutes for the test suite to attempt repeat runs",
type=int)
output_results_group = mode_group.add_mutually_exclusive_group()
output_results_group.add_argument("--verify-no-output-results", action="store_false",
dest="verify_output_results",
Expand Down
Loading