[ci][microcheck/13] reuse the logic to determine microcheck tests (#4…

…5504) Currently the logic to determine the list of microcheck tests and their rayci step ids are diverging. This PR move more function into Test class, so we converge more these two logic. This make sure that we will trigger the right buildkite steps to cover most if not all microcheck tests. Test: - CI --------- Signed-off-by: can <[email protected]>
ray-project · May 26, 2024 · 15c294e · 15c294e
1 parent a777269
commit 15c294e
Show file tree

Hide file tree

Showing 6 changed files with 146 additions and 127 deletions.
diff --git a/.buildkite/data.rayci.yml b/.buildkite/data.rayci.yml
@@ -57,10 +57,10 @@ steps:
     depends_on: data16build
 
   - label: ":database: data: arrow nightly tests"
+    if: pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
     tags: 
       - python
       - data
-      - skip-on-premerge
     instance_type: medium
     parallelism: 2
     commands:

diff --git a/ci/ray_ci/automation/determine_microcheck_step_ids.py b/ci/ray_ci/automation/determine_microcheck_step_ids.py
@@ -1,4 +1,5 @@
 import click
+import os
 
 from ci.ray_ci.utils import ci_init
 from ray_release.test import (
@@ -8,6 +9,8 @@
     MACOS_TEST_PREFIX,
 )
 
+BAZEL_WORKSPACE_DIR = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "")
+
 
 @click.command()
 def main() -> None:
@@ -16,9 +19,9 @@ def main() -> None:
     """
     ci_init()
     steps = (
-        list(Test.gen_high_impact_tests(LINUX_TEST_PREFIX).keys())
-        + list(Test.gen_high_impact_tests(WINDOWS_TEST_PREFIX).keys())
-        + list(Test.gen_high_impact_tests(MACOS_TEST_PREFIX).keys())
+        Test.gen_microcheck_step_ids(LINUX_TEST_PREFIX, BAZEL_WORKSPACE_DIR)
+        .union(Test.gen_microcheck_step_ids(WINDOWS_TEST_PREFIX, BAZEL_WORKSPACE_DIR))
+        .union(Test.gen_microcheck_step_ids(MACOS_TEST_PREFIX, BAZEL_WORKSPACE_DIR))
     )
 
     print(",".join(steps))

diff --git a/ci/ray_ci/test_tester.py b/ci/ray_ci/test_tester.py
@@ -13,10 +13,9 @@
     _get_container,
     _get_all_test_query,
     _get_test_targets,
-    _get_high_impact_test_targets,
+    _get_new_tests,
     _get_flaky_test_targets,
     _get_tag_matcher,
-    _get_new_tests,
 )
 from ray_release.test import Test, TestState
 
@@ -121,15 +120,12 @@ def test_get_test_targets() -> None:
         ), mock.patch(
             "ray_release.test.Test.gen_from_s3",
             return_value=test_objects,
-        ), mock.patch(
-            "ray_release.test.Test.gen_high_impact_tests",
-            return_value={"step": test_objects},
-        ), mock.patch(
-            "ray_release.test.Test.get_changed_tests",
-            return_value=set(),
         ), mock.patch(
             "ci.ray_ci.tester._get_new_tests",
             return_value=set(),
+        ), mock.patch(
+            "ray_release.test.Test.gen_microcheck_tests",
+            return_value={test.get_target() for test in test_objects},
         ):
             assert set(
                 _get_test_targets(
@@ -211,64 +207,6 @@ def test_get_all_test_query() -> None:
     )
 
 
-def test_get_high_impact_test_targets() -> None:
-    test_harness = [
-        {
-            "input": [],
-            "new_tests": set(),
-            "changed_tests": set(),
-            "human_tests": set(),
-            "output": set(),
-        },
-        {
-            "input": [
-                _stub_test(
-                    {
-                        "name": "linux://core_good",
-                        "team": "core",
-                    }
-                ),
-                _stub_test(
-                    {
-                        "name": "linux://serve_good",
-                        "team": "serve",
-                    }
-                ),
-            ],
-            "new_tests": {"//core_new"},
-            "changed_tests": {"//core_new"},
-            "human_tests": {"//human_test"},
-            "output": {
-                "//core_good",
-                "//core_new",
-                "//human_test",
-            },
-        },
-    ]
-    for test in test_harness:
-        with mock.patch(
-            "ray_release.test.Test.gen_high_impact_tests",
-            return_value={"step": test["input"]},
-        ), mock.patch(
-            "ci.ray_ci.tester._get_new_tests",
-            return_value=test["new_tests"],
-        ), mock.patch(
-            "ray_release.test.Test.get_changed_tests",
-            return_value=test["changed_tests"],
-        ), mock.patch(
-            "ray_release.test.Test.get_human_specified_tests",
-            return_value=test["human_tests"],
-        ):
-            assert (
-                _get_high_impact_test_targets(
-                    "core",
-                    "linux",
-                    LinuxTesterContainer("test", skip_ray_installation=True),
-                )
-                == test["output"]
-            )
-
-
 @mock.patch("ci.ray_ci.tester_container.TesterContainer.run_script_with_output")
 @mock.patch("ray_release.test.Test.gen_from_s3")
 def test_get_new_tests(mock_gen_from_s3, mock_run_script_with_output) -> None:

diff --git a/ci/ray_ci/tester.py b/ci/ray_ci/tester.py
@@ -1,4 +1,3 @@
-import itertools
 import os
 import sys
 from typing import List, Set, Tuple, Optional
@@ -37,7 +36,6 @@
 """  # noqa: E501
 
 DEFAULT_EXCEPT_TAGS = {"manual"}
-MICROCHECK_COMMAND = "@microcheck"
 
 # Gets the path of product/tools/docker (i.e. the parent of 'common')
 bazel_workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "")
@@ -394,38 +392,20 @@ def _get_test_targets(
     if get_high_impact_tests:
         # run high impact test cases, so we include only high impact tests in the list
         # of targets provided by users
-        high_impact_tests = _get_high_impact_test_targets(
-            team, operating_system, container
-        )
+        prefix = f"{operating_system}:"
+        # TODO(can): we should also move the logic of _get_new_tests into the
+        # gen_microcheck_tests function; this is currently blocked by the fact that
+        # we need a container to run _get_new_tests
+        high_impact_tests = Test.gen_microcheck_tests(
+            prefix=prefix,
+            bazel_workspace_dir=bazel_workspace_dir,
+            team=team,
+        ).union(_get_new_tests(prefix, container))
         final_targets = high_impact_tests.intersection(final_targets)
 
     return list(final_targets)
 
 
-def _get_high_impact_test_targets(
-    team: str, operating_system: str, container: TesterContainer
-) -> Set[str]:
-    """
-    Get all test targets that are high impact
-    """
-    os_prefix = f"{operating_system}:"
-    step_id_to_tests = Test.gen_high_impact_tests(prefix=os_prefix)
-    high_impact_tests = {
-        test.get_name().lstrip(os_prefix)
-        for test in itertools.chain.from_iterable(step_id_to_tests.values())
-        if test.get_oncall() == team
-    }
-    new_tests = _get_new_tests(os_prefix, container)
-    changed_tests = Test.get_changed_tests(bazel_workspace_dir)
-    human_specified_tests = Test.get_human_specified_tests(bazel_workspace_dir)
-
-    return (
-        high_impact_tests.union(new_tests)
-        .union(changed_tests)
-        .union(human_specified_tests)
-    )
-
-
 def _get_new_tests(prefix: str, container: TesterContainer) -> Set[str]:
     """
     Get all local test targets that are not in database

diff --git a/release/ray_release/test.py b/release/ray_release/test.py
@@ -198,24 +198,63 @@ def gen_from_s3(cls, prefix: str):
         ]
 
     @classmethod
-    def gen_high_impact_tests(cls, prefix: str) -> Dict[str, List]:
+    def gen_microcheck_step_ids(cls, prefix: str, bazel_workspace_dir: str) -> Set[str]:
         """
-        Obtain the mapping from rayci step id to high impact tests with the given prefix
+        This function is used to get the buildkite step ids of the microcheck tests
+        with the given test prefix. This is used to determine the buildkite steps in
+        the microcheck pipeline.
+        """
+        step_ids = set()
+        test_targets = cls.gen_microcheck_tests(prefix, bazel_workspace_dir)
+        for test_target in test_targets:
+            test = cls.gen_from_name(f"{prefix}{test_target}")
+            if not test:
+                continue
+            recent_results = test.get_test_results()
+            if not recent_results:
+                continue
+            test_step_ids = {
+                result.rayci_step_id
+                for result in recent_results
+                if result.commit == recent_results[0].commit and result.rayci_step_id
+            }
+            if test_step_ids and not step_ids.intersection(test_step_ids):
+                step_ids.add(sorted(test_step_ids)[0])
+
+        return step_ids
+
+    @classmethod
+    def gen_microcheck_tests(
+        cls, prefix: str, bazel_workspace_dir: str, team: Optional[str] = None
+    ) -> Set[str]:
+        """
+        Obtain all microcheck tests with the given prefix
+        """
+        high_impact_tests = Test._gen_high_impact_tests(prefix, team)
+        changed_tests = Test._get_changed_tests(bazel_workspace_dir)
+        human_specified_tests = Test._get_human_specified_tests(bazel_workspace_dir)
+
+        return high_impact_tests.union(changed_tests, human_specified_tests)
+
+    @classmethod
+    def _gen_high_impact_tests(
+        cls, prefix: str, team: Optional[str] = None
+    ) -> Set[str]:
+        """
+        Obtain all high impact tests with the given prefix
         """
         high_impact_tests = [
             test for test in cls.gen_from_s3(prefix) if test.is_high_impact()
         ]
-        step_id_to_tests = {}
-        for test in high_impact_tests:
-            step_id = test.get_test_results(limit=1)[0].rayci_step_id
-            if not step_id:
-                continue
-            step_id_to_tests[step_id] = step_id_to_tests.get(step_id, []) + [test]
+        if team:
+            high_impact_tests = [
+                test for test in high_impact_tests if test.get_oncall() == team
+            ]
 
-        return step_id_to_tests
+        return {test.get_target() for test in high_impact_tests}
 
     @classmethod
-    def get_human_specified_tests(cls, bazel_workspace_dir: str) -> Set[str]:
+    def _get_human_specified_tests(cls, bazel_workspace_dir: str) -> Set[str]:
         """
         Get all test targets that are specified by humans
         """
@@ -238,7 +277,7 @@ def get_human_specified_tests(cls, bazel_workspace_dir: str) -> Set[str]:
         return tests
 
     @classmethod
-    def get_changed_tests(cls, bazel_workspace_dir: str) -> Set[str]:
+    def _get_changed_tests(cls, bazel_workspace_dir: str) -> Set[str]:
         """
         Get all changed tests in the current PR
         """