Skip to content

Commit

Permalink
7660 Avoid CI stability checks timing out (#32202)
Browse files Browse the repository at this point in the history
* allow stability checks to avoid TC timeout by checking times in between repeat runs

* fix flake8 issue

* remove empty flag to trigger stability checks

* some commenting explaining how iterations are being tracked

* add --repeat-max-time flag

* better descriptors for announcing results

If the repeat runs stop early to avoid, timeout, a message will be written in the results explaining so.

* cast max_time to timedelta

* correct syntax for kwargs

* specify kwargs source for run_test_iteration

* remove empty css flags tag to trigger stability checks

* Add clause to handle an ineffective number of iterations completing

* replace unassociated change used to trigger stability checks

* Implement changes suggested by @stephenmcgruer

* Add only necessary formatting changes to stability

* wptrunner reformatting changes suggested by @jgraham

* functional changes to stability tests suggested by @jgraham

* flake8 changes "line break before binary operator"

* change run_tests to return counts object

* ensure run_tests return values are properly accessed

Now that wptrunner's run_tests returns more than 1 value, the return type will be a tuple for the older variables that expect only 1 value. We need to ensure that we pull the expected first value (boolean) out of that tuple.

* run_tests has consistent return values even in fail cases

wptrunner's run_tests would return a tuple only if not issues arose while running, and would return only a boolean in the case of some expected issue. Now a tuple is returned in all instances as expected.

* Return full counts in TestStatus class

run_tests will now return a new TestStatus object rather than returning only the number of iterations run. This will allow for more robust statistics to be shown in the future.

* small formatting changes

reducing some comments and logs to taking less vertical space.

* small wording change

TestStatus docstring

* Replace counts with TestStatus object

forego the use of defaultdict counts keeping track of test info/results and instead use the custom class TestStatus.

* convert some log strings to f-strings
  • Loading branch information
DanielRyanSmith authored Jan 11, 2022
1 parent 4928cc8 commit 0a269ce
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 133 deletions.
40 changes: 35 additions & 5 deletions tools/wptrunner/wptrunner/stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,10 @@ def wrap_handler(x):
# warning+ level logs only
logger.add_handler(StreamHandler(log, JSONFormatter()))

wptrunner.run_tests(**kwargs)
# Use the number of iterations of the test suite that were run to process the results.
# if the runs were stopped to avoid hitting the maximum run time.
_, test_status = wptrunner.run_tests(**kwargs)
iterations = test_status.repeated_runs

logger._state.handlers = initial_handlers
logger._state.running_tests = set()
Expand All @@ -311,12 +314,24 @@ def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
if repeat_loop:
desc = "Running tests in a loop %d times%s" % (repeat_loop,
flags_string)
steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))
steps.append((desc,
functools.partial(run_step,
logger,
repeat_loop,
False,
kwargs_extra),
repeat_loop))

if repeat_restart:
desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
flags_string)
steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))
steps.append((desc,
functools.partial(run_step,
logger,
repeat_restart,
True,
kwargs_extra),
repeat_restart))

return steps

Expand All @@ -335,6 +350,7 @@ def write_summary(logger, step_results, final_result):

logger.info(':::')


def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
output_results=True, **kwargs):
kwargs_extras = [{}]
Expand All @@ -348,7 +364,7 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m

github_checks_outputter = get_gh_checks_outputter(kwargs["github_checks_text_file"])

for desc, step_func in steps:
for desc, step_func, expected_iterations in steps:
if max_time and datetime.now() - start_time > max_time:
logger.info("::: Test verification is taking too long: Giving up!")
logger.info("::: So far, all checks passed, but not all checks were run.")
Expand All @@ -359,6 +375,14 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
logger.info('::: Running test verification step "%s"...' % desc)
logger.info(':::')
results, inconsistent, slow, iterations = step_func(**kwargs)

if iterations <= 1 and expected_iterations > 1:
step_results.append((desc, "FAIL"))
logger.info("::: Reached iteration timeout before finishing 2 or more repeat runs.")
logger.info("::: At least 2 successful repeat runs are required to validate stability.")
write_summary(logger, step_results, "TIMEOUT")
return 1

if output_results:
write_results(logger.info, results, iterations)

Expand All @@ -378,6 +402,12 @@ def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, m
write_summary(logger, step_results, "FAIL")
return 1

step_results.append((desc, "PASS"))
# If the tests passed but the number of iterations didn't match the number expected to run,
# it is likely that the runs were stopped early to avoid a timeout.
if iterations != expected_iterations:
result = f"PASS * {iterations}/{expected_iterations} repeats completed"
step_results.append((desc, result))
else:
step_results.append((desc, "PASS"))

write_summary(logger, step_results, "PASS")
4 changes: 4 additions & 0 deletions tools/wptrunner/wptrunner/wptcommandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ def create_parser(product_choices=None):
default=None,
help="The maximum number of minutes for the job to run",
type=lambda x: timedelta(minutes=float(x)))
mode_group.add_argument("--repeat-max-time", action="store",
default=100,
help="The maximum number of minutes for the test suite to attempt repeat runs",
type=int)
output_results_group = mode_group.add_mutually_exclusive_group()
output_results_group.add_argument("--verify-no-output-results", action="store_false",
dest="verify_output_results",
Expand Down
Loading

0 comments on commit 0a269ce

Please sign in to comment.