[CI][Bisect][4] Add pre-sanity check to avoid infra or external chang…

…e root causes (ray-project#34553) Why are these changes needed? Many time tests can fail due to a non-code-change issue (external or infra issues). Before running a bisect, run a pre-sanity check to make sure that the provided passing and failing revision is valid. Otherwise, terminate bisect early and let the users know that the test is flaky. Signed-off-by: elliottower <[email protected]>
elliottower · Apr 22, 2023 · a6deb57 · a6deb57
1 parent fa30fff
commit a6deb57
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 44 deletions.
diff --git a/release/ray_release/scripts/ray_bisect.py b/release/ray_release/scripts/ray_bisect.py
@@ -3,7 +3,7 @@
 import os
 import json
 import time
-from typing import List
+from typing import Dict, List, Optional, Set
 from ray_release.logger import logger
 from ray_release.buildkite.step import get_step
 from ray_release.config import (
@@ -18,64 +18,134 @@
 @click.argument("test_name", required=True, type=str)
 @click.argument("passing_commit", required=True, type=str)
 @click.argument("failing_commit", required=True, type=str)
-def main(test_name: str, passing_commit: str, failing_commit: str) -> None:
+@click.option(
+    "--concurrency",
+    default=3,
+    type=int,
+    help=(
+        "Maximum number of concurrent test jobs to run. Higher number uses more "
+        "capacity, but reduce the bisect duration"
+    ),
+)
+def main(
+    test_name: str,
+    passing_commit: str,
+    failing_commit: str,
+    concurrency: Optional[int] = 1,
+) -> None:
+    if concurrency <= 0:
+        raise ValueError(
+            f"Concurrency input need to be a positive number, received: {concurrency}"
+        )
+    test = _get_test(test_name)
+    pre_sanity_check = _sanity_check(test, passing_commit, failing_commit)
+    if not pre_sanity_check:
+        logger.info(
+            "Failed pre-saniy check, the test might be flaky or fail due to"
+            " an external (not a code change) factors"
+        )
+        return
     commit_lists = _get_commit_lists(passing_commit, failing_commit)
-    blamed_commit = _bisect(test_name, commit_lists)
+    blamed_commit = _bisect(test, commit_lists, concurrency)
     logger.info(f"Blamed commit found for test {test_name}: {blamed_commit}")
 
 
-def _bisect(test_name: str, commit_list: List[str]) -> str:
-    test = _get_test(test_name)
+def _bisect(test: Test, commit_list: List[str], concurrency: int) -> str:
     while len(commit_list) > 2:
         logger.info(
             f"Bisecting between {len(commit_list)} commits: "
-            f"{commit_list[0]} to {commit_list[-1]}"
+            f"{commit_list[0]} to {commit_list[-1]} with concurrency {concurrency}"
         )
-        middle_commit_idx = len(commit_list) // 2
-        middle_commit = commit_list[middle_commit_idx]
-        is_passing = _run_test(test, middle_commit)
-        if is_passing:
-            commit_list = commit_list[middle_commit_idx:]
-        else:
-            commit_list = commit_list[: middle_commit_idx + 1]
+        idx_to_commit = {}
+        for i in range(concurrency):
+            idx = len(commit_list) * (i + 1) // (concurrency + 1)
+            idx_to_commit[idx] = commit_list[idx]
+        outcomes = _run_test(test, set(idx_to_commit.values()))
+        passing_idx = 0
+        failing_idx = len(commit_list) - 1
+        for idx, commit in idx_to_commit.items():
+            is_passing = outcomes[commit] == "passed"
+            if is_passing and idx > passing_idx:
+                passing_idx = idx
+            if not is_passing and idx < failing_idx:
+                failing_idx = idx
+        commit_list = commit_list[passing_idx : failing_idx + 1]
     return commit_list[-1]
 
 
-def _run_test(test: Test, commit: str) -> bool:
-    logger.info(f'Running test {test["name"]} on commit {commit}')
-    _trigger_test_run(test, commit)
-    return _obtain_test_result(commit)
+def _sanity_check(test: Test, passing_revision: str, failing_revision: str) -> bool:
+    """
+    Sanity check that the test indeed passes on the passing revision, and fails on the
+    failing revision
+    """
+    logger.info(
+        f"Sanity check passing revision: {passing_revision}"
+        f" and failing revision: {failing_revision}"
+    )
+    outcomes = _run_test(test, [passing_revision, failing_revision])
+    return (
+        outcomes[passing_revision] == "passed"
+        and outcomes[failing_revision] != "passed"
+    )
+
+
+def _run_test(test: Test, commits: Set[str]) -> Dict[str, str]:
+    logger.info(f'Running test {test["name"]} on commits {commits}')
+    for commit in commits:
+        _trigger_test_run(test, commit)
+    return _obtain_test_result(commits)
 
 
 def _trigger_test_run(test: Test, commit: str) -> None:
     ray_wheels_url = find_and_wait_for_ray_wheels_url(
         commit,
         timeout=DEFAULT_WHEEL_WAIT_TIMEOUT,
     )
-    step = get_step(test, ray_wheels=ray_wheels_url)
-    step["label"] = f'{test["name"]}:{commit[:6]}'
+    step = get_step(
+        test,
+        ray_wheels=ray_wheels_url,
+        env={
+            "RAY_COMMIT_OF_WHEEL": commit,
+        },
+    )
+    step["label"] = f'{test["name"]}:{commit[:7]}'
     step["key"] = commit
-    pipeline = json.dumps({"steps": [step]})
+    pipeline = subprocess.Popen(
+        ["echo", json.dumps({"steps": [step]})], stdout=subprocess.PIPE
+    )
     subprocess.check_output(
-        f'echo "{pipeline}" | buildkite-agent pipeline upload',
-        shell=True,
+        ["buildkite-agent", "pipeline", "upload"], stdin=pipeline.stdout
     )
+    pipeline.stdout.close()
 
 
-def _obtain_test_result(buildkite_step_key: str) -> bool:
-    outcome = None
-    wait = 30
+def _obtain_test_result(buildkite_step_keys: List[str]) -> Dict[str, str]:
+    outcomes = {}
+    wait = 5
     total_wait = 0
-    while outcome not in ["passed", "hard_failed", "soft_failed"]:
+    while True:
         logger.info(f"... waiting for test result ...({total_wait} seconds)")
-        outcome = subprocess.check_output(
-            f'buildkite-agent step get "outcome" --step "{buildkite_step_key}"',
-            shell=True,
-        ).decode("utf-8")
+        for key in buildkite_step_keys:
+            if key in outcomes:
+                continue
+            outcome = subprocess.check_output(
+                [
+                    "buildkite-agent",
+                    "step",
+                    "get",
+                    "outcome",
+                    "--step",
+                    key,
+                ]
+            ).decode("utf-8")
+            if outcome:
+                outcomes[key] = outcome
+        if len(outcomes) == len(buildkite_step_keys):
+            break
         time.sleep(wait)
         total_wait = total_wait + wait
-    logger.info(f"Final test outcome: {outcome}")
-    return outcome == "passed"
+    logger.info(f"Final test outcomes: {outcomes}")
+    return outcomes
 
 
 def _get_test(test_name: str) -> Test:

diff --git a/release/ray_release/tests/test_bisect.py b/release/ray_release/tests/test_bisect.py
@@ -1,31 +1,36 @@
 from unittest import mock
+from typing import List, Dict
 from ray_release.scripts.ray_bisect import _bisect
+from ray_release.config import Test
 
 
 def test_bisect():
     test_cases = {
         "c3": {
-            "c0": True,
-            "c1": True,
-            "c3": False,
-            "c4": False,
+            "c0": "passed",
+            "c1": "passed",
+            "c3": "hard_failed",
+            "c4": "soft_failed",
         },
         "c1": {
-            "c0": True,
-            "c1": False,
+            "c0": "passed",
+            "c1": "hard_failed",
+            "c2": "hard_failed",
+        },
+        "cc1": {
+            "cc0": "passed",
+            "cc1": "hard_failed",
         },
     }
 
     for output, input in test_cases.items():
 
-        def _mock_run_test(test_name: str, commit: str) -> bool:
-            return input[commit]
+        def _mock_run_test(test: Test, commit: List[str]) -> Dict[str, str]:
+            return input
 
         with mock.patch(
             "ray_release.scripts.ray_bisect._run_test",
             side_effect=_mock_run_test,
-        ), mock.patch(
-            "ray_release.scripts.ray_bisect._get_test",
-            return_value={},
         ):
-            assert _bisect("test", list(input.keys())) == output
+            for concurreny in range(1, 4):
+                assert _bisect({}, list(input.keys()), concurreny) == output