Skip to content

Commit

Permalink
[CI][Bisect][4] Add pre-sanity check to avoid infra or external chang…
Browse files Browse the repository at this point in the history
…e root causes (ray-project#34553)

Why are these changes needed?
Many time tests can fail due to a non-code-change issue (external or infra issues). Before running a bisect, run a pre-sanity check to make sure that the provided passing and failing revision is valid. Otherwise, terminate bisect early and let the users know that the test is flaky.

Signed-off-by: elliottower <[email protected]>
  • Loading branch information
can-anyscale authored and elliottower committed Apr 22, 2023
1 parent fa30fff commit a6deb57
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 44 deletions.
134 changes: 102 additions & 32 deletions release/ray_release/scripts/ray_bisect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import json
import time
from typing import List
from typing import Dict, List, Optional, Set
from ray_release.logger import logger
from ray_release.buildkite.step import get_step
from ray_release.config import (
Expand All @@ -18,64 +18,134 @@
@click.argument("test_name", required=True, type=str)
@click.argument("passing_commit", required=True, type=str)
@click.argument("failing_commit", required=True, type=str)
def main(test_name: str, passing_commit: str, failing_commit: str) -> None:
@click.option(
"--concurrency",
default=3,
type=int,
help=(
"Maximum number of concurrent test jobs to run. Higher number uses more "
"capacity, but reduce the bisect duration"
),
)
def main(
test_name: str,
passing_commit: str,
failing_commit: str,
concurrency: Optional[int] = 1,
) -> None:
if concurrency <= 0:
raise ValueError(
f"Concurrency input need to be a positive number, received: {concurrency}"
)
test = _get_test(test_name)
pre_sanity_check = _sanity_check(test, passing_commit, failing_commit)
if not pre_sanity_check:
logger.info(
"Failed pre-saniy check, the test might be flaky or fail due to"
" an external (not a code change) factors"
)
return
commit_lists = _get_commit_lists(passing_commit, failing_commit)
blamed_commit = _bisect(test_name, commit_lists)
blamed_commit = _bisect(test, commit_lists, concurrency)
logger.info(f"Blamed commit found for test {test_name}: {blamed_commit}")


def _bisect(test_name: str, commit_list: List[str]) -> str:
test = _get_test(test_name)
def _bisect(test: Test, commit_list: List[str], concurrency: int) -> str:
while len(commit_list) > 2:
logger.info(
f"Bisecting between {len(commit_list)} commits: "
f"{commit_list[0]} to {commit_list[-1]}"
f"{commit_list[0]} to {commit_list[-1]} with concurrency {concurrency}"
)
middle_commit_idx = len(commit_list) // 2
middle_commit = commit_list[middle_commit_idx]
is_passing = _run_test(test, middle_commit)
if is_passing:
commit_list = commit_list[middle_commit_idx:]
else:
commit_list = commit_list[: middle_commit_idx + 1]
idx_to_commit = {}
for i in range(concurrency):
idx = len(commit_list) * (i + 1) // (concurrency + 1)
idx_to_commit[idx] = commit_list[idx]
outcomes = _run_test(test, set(idx_to_commit.values()))
passing_idx = 0
failing_idx = len(commit_list) - 1
for idx, commit in idx_to_commit.items():
is_passing = outcomes[commit] == "passed"
if is_passing and idx > passing_idx:
passing_idx = idx
if not is_passing and idx < failing_idx:
failing_idx = idx
commit_list = commit_list[passing_idx : failing_idx + 1]
return commit_list[-1]


def _run_test(test: Test, commit: str) -> bool:
logger.info(f'Running test {test["name"]} on commit {commit}')
_trigger_test_run(test, commit)
return _obtain_test_result(commit)
def _sanity_check(test: Test, passing_revision: str, failing_revision: str) -> bool:
"""
Sanity check that the test indeed passes on the passing revision, and fails on the
failing revision
"""
logger.info(
f"Sanity check passing revision: {passing_revision}"
f" and failing revision: {failing_revision}"
)
outcomes = _run_test(test, [passing_revision, failing_revision])
return (
outcomes[passing_revision] == "passed"
and outcomes[failing_revision] != "passed"
)


def _run_test(test: Test, commits: Set[str]) -> Dict[str, str]:
logger.info(f'Running test {test["name"]} on commits {commits}')
for commit in commits:
_trigger_test_run(test, commit)
return _obtain_test_result(commits)


def _trigger_test_run(test: Test, commit: str) -> None:
ray_wheels_url = find_and_wait_for_ray_wheels_url(
commit,
timeout=DEFAULT_WHEEL_WAIT_TIMEOUT,
)
step = get_step(test, ray_wheels=ray_wheels_url)
step["label"] = f'{test["name"]}:{commit[:6]}'
step = get_step(
test,
ray_wheels=ray_wheels_url,
env={
"RAY_COMMIT_OF_WHEEL": commit,
},
)
step["label"] = f'{test["name"]}:{commit[:7]}'
step["key"] = commit
pipeline = json.dumps({"steps": [step]})
pipeline = subprocess.Popen(
["echo", json.dumps({"steps": [step]})], stdout=subprocess.PIPE
)
subprocess.check_output(
f'echo "{pipeline}" | buildkite-agent pipeline upload',
shell=True,
["buildkite-agent", "pipeline", "upload"], stdin=pipeline.stdout
)
pipeline.stdout.close()


def _obtain_test_result(buildkite_step_key: str) -> bool:
outcome = None
wait = 30
def _obtain_test_result(buildkite_step_keys: List[str]) -> Dict[str, str]:
outcomes = {}
wait = 5
total_wait = 0
while outcome not in ["passed", "hard_failed", "soft_failed"]:
while True:
logger.info(f"... waiting for test result ...({total_wait} seconds)")
outcome = subprocess.check_output(
f'buildkite-agent step get "outcome" --step "{buildkite_step_key}"',
shell=True,
).decode("utf-8")
for key in buildkite_step_keys:
if key in outcomes:
continue
outcome = subprocess.check_output(
[
"buildkite-agent",
"step",
"get",
"outcome",
"--step",
key,
]
).decode("utf-8")
if outcome:
outcomes[key] = outcome
if len(outcomes) == len(buildkite_step_keys):
break
time.sleep(wait)
total_wait = total_wait + wait
logger.info(f"Final test outcome: {outcome}")
return outcome == "passed"
logger.info(f"Final test outcomes: {outcomes}")
return outcomes


def _get_test(test_name: str) -> Test:
Expand Down
29 changes: 17 additions & 12 deletions release/ray_release/tests/test_bisect.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
from unittest import mock
from typing import List, Dict
from ray_release.scripts.ray_bisect import _bisect
from ray_release.config import Test


def test_bisect():
test_cases = {
"c3": {
"c0": True,
"c1": True,
"c3": False,
"c4": False,
"c0": "passed",
"c1": "passed",
"c3": "hard_failed",
"c4": "soft_failed",
},
"c1": {
"c0": True,
"c1": False,
"c0": "passed",
"c1": "hard_failed",
"c2": "hard_failed",
},
"cc1": {
"cc0": "passed",
"cc1": "hard_failed",
},
}

for output, input in test_cases.items():

def _mock_run_test(test_name: str, commit: str) -> bool:
return input[commit]
def _mock_run_test(test: Test, commit: List[str]) -> Dict[str, str]:
return input

with mock.patch(
"ray_release.scripts.ray_bisect._run_test",
side_effect=_mock_run_test,
), mock.patch(
"ray_release.scripts.ray_bisect._get_test",
return_value={},
):
assert _bisect("test", list(input.keys())) == output
for concurreny in range(1, 4):
assert _bisect({}, list(input.keys()), concurreny) == output

0 comments on commit a6deb57

Please sign in to comment.