From e75efb24b7d831ea30f409d034f8b768e228bbb6 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 09:45:57 +0100
Subject: [PATCH 01/61] Add base docker files

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base  | 0
 ci/docker/Dockerfile.build | 0
 ci/docker/Dockerfile.gpu   | 0
 ci/docker/Dockerfile.ml    | 0
 ci/docker/Dockerfile.test  | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 ci/docker/Dockerfile.base
 create mode 100644 ci/docker/Dockerfile.build
 create mode 100644 ci/docker/Dockerfile.gpu
 create mode 100644 ci/docker/Dockerfile.ml
 create mode 100644 ci/docker/Dockerfile.test

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
new file mode 100644
index 000000000000..e69de29bb2d1

From b787f03f1d42d36d90a099bdfb162f5ff4311457 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 09:53:46 +0100
Subject: [PATCH 02/61] Update pipelines

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml | 512 ++++++++++++++++++++++++++++++
 .buildkite/pipeline.test.yml  |  61 ++++
 .buildkite/pipeline.yml       | 566 ----------------------------------
 3 files changed, 573 insertions(+), 566 deletions(-)
 create mode 100644 .buildkite/pipeline.build.yml
 create mode 100644 .buildkite/pipeline.test.yml

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
new file mode 100644
index 000000000000..0c388469fb5e
--- /dev/null
+++ b/.buildkite/pipeline.build.yml
@@ -0,0 +1,512 @@
+- label: ":ferris_wheel: Wheels and Jars"
+  conditions:
+    [
+        "RAY_CI_LINUX_WHEELS_AFFECTED",
+        "RAY_CI_JAVA_AFFECTED",
+    ]
+  commands:
+    # Build the wheels and jars
+    - UPLOAD_WHEELS_AS_ARTIFACTS=1 LINUX_WHEELS=1 LINUX_JARS=1 ./ci/ci.sh build
+    - bash ./java/build-jar-multiplatform.sh linux
+    # Upload the wheels and jars
+    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    # Upload to branch directory.
+    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+    - python .buildkite/copy_files.py --destination branch_jars --path ./.jar/linux
+    # Upload to latest directory.
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination jars --path ./.jar/linux; fi
+
+- label: ":ferris_wheel: Post-wheel tests"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=post_wheel_build
+      --test_env=CONDA_EXE
+      --test_env=CONDA_PYTHON_EXE
+      --test_env=CONDA_SHLVL
+      --test_env=CONDA_PREFIX
+      --test_env=CONDA_DEFAULT_ENV
+      --test_env=CI
+      --test_env=RAY_CI_POST_WHEEL_TESTS=True
+      python/ray/tests/... python/ray/serve/... python/ray/tune/... rllib/... doc/...
+
+- label: ":ferris_wheel: Debug Wheels"
+  conditions:
+    [
+        "RAY_CI_LINUX_WHEELS_AFFECTED",
+        "RAY_CI_JAVA_AFFECTED",
+    ]
+  commands:
+    # Build the debug wheels
+    - RAY_DEBUG_BUILD=debug LINUX_WHEELS=1 ./ci/ci.sh build
+    # Upload the wheels.
+    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    # Upload to branch directory.
+    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+    # Upload to latest directory.
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+
+# Not working now.
+# - label: ":ferris_wheel: ASAN Wheels"
+#   conditions:
+#     [
+#         "RAY_CI_LINUX_WHEELS_AFFECTED",
+#         "RAY_CI_JAVA_AFFECTED",
+#     ]
+#   commands:
+#     # Build the asan wheels
+#     - RAY_DEBUG_BUILD=asan LINUX_WHEELS=1 ./ci/ci.sh build
+#     # Upload the wheels.
+#     # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+#     - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+#     - pip install -q docker aws_requests_auth boto3
+#     # Upload to branch directory.
+#     - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+#     # Upload to latest directory.
+#     - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+
+- label: ":docker: Build Images: py36 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py36 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cu111 cu112 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py37 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py37 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py38 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py38 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py39 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py39 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py310 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py310 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
+
+- label: ":java: Java"
+  conditions: ["RAY_CI_JAVA_AFFECTED"]
+  commands:
+    - ./java/test.sh
+
+- label: ":cpp: Ray CPP Worker"
+  conditions: [ "RAY_CI_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_cpp
+
+- label: ":cpp: Tests"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=llvm $(./ci/run/bazel_export_options)
+      --build_tests_only
+      -- //:all -rllib/... -core_worker_test
+
+- label: ":cpp: Tests (ASAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=asan-clang $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test
+
+- label: ":cpp: Tests (UBSAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=ubsan $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test -//:logging_test -//:ray_syncer_test
+
+- label: ":cpp: Tests (TSAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=tsan-clang $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test -//:event_test -//:gcs_actor_manager_test
+      -//:gcs_placement_group_manager_test -//:gcs_placement_group_scheduler_test
+      -//:gcs_server_rpc_test -//:gcs_client_test -//:gcs_heartbeat_manager_test
+      -//:metric_exporter_client_test -//:stats_test -//:worker_pool_test
+      -//:ray_syncer_test
+
+- label: ":serverless: Dashboard Tests"
+  conditions:
+    [
+        "RAY_CI_DASHBOARD_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - ./dashboard/tests/run_ui_tests.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) python/ray/dashboard/...
+
+- label: ":serverless: Serve Release Tests"
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=team:serve
+      release/...
+
+- label: ":serverless: Serve Tests"
+  parallelism: 3
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+        "RAY_CI_ML_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
+    - ./ci/env/env_info.sh
+    - >-
+      set -x;
+      python ./ci/run/bazel-sharding.py
+      --exclude_manual
+      --index "\${BUILDKITE_PARALLEL_JOB}" --count "\${BUILDKITE_PARALLEL_JOB_COUNT}"
+      python/ray/serve/...
+      > test_shard.txt
+    - cat test_shard.txt
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-post_wheel_build,-py37,-gpu
+      $(cat test_shard.txt)
+
+
+- label: ":serverless: Serve Tests (Python 3.7)"
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client.
+    # (Remove thirdparty_files to sidestep an issue with psutil.)
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - ./ci/ci.sh build
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=team:serve
+      python/ray/serve/test_gradio
+      python/ray/serve/test_gradio_visualization
+
+
+- label: ":python: Minimal install 3.6"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.6
+
+- label: ":python: Minimal install 3.7"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.7
+
+- label: ":python: Minimal install 3.8"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.8
+
+- label: ":python: Minimal install 3.9"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.9
+
+- label: ":python: Minimal install 3.10"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.10
+
+- label: ":python: Default install"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/install-default.sh
+    - ./ci/env/env_info.sh
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/dashboard/test_dashboard
+
+- label: ":python: Ray Serve default install"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/install-serve.sh
+    - ./ci/env/env_info.sh
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/serve/test_deployment_graph
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/serve/test_api
+
+- label: ":python: Release test package unit tests"
+  conditions: ["ALWAYS"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip install -e release/
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --test_tag_filters=release_unit
+      release/...
+
+- label: ":python: (Small & Client)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bash ./ci/ci.sh prepare_docker
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=client_tests,small_size_python_tests
+      -- python/ray/tests/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=ray_ha
+      --test_env=DOCKER_HOST=tcp://docker:2376
+      --test_env=DOCKER_TLS_VERIFY=1
+      --test_env=DOCKER_CERT_PATH=/certs/client
+      --test_env=DOCKER_TLS_CERTDIR=/certs
+      -- python/ray/tests/...
+
+- label: ":python: (Large)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  parallelism: 3
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - ./ci/ci.sh test_large
+
+- label: ":python: (Medium A-J)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
+      python/ray/tests/...
+
+- label: ":python: (Medium K-Z)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
+      python/ray/tests/...
+
+- label: ":redis: (External Redis) (Small & Client)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=client_tests,small_size_python_tests
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- python/ray/tests/...
+
+- label: ":redis: (External Redis) (Large)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  parallelism: 3
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - TEST_EXTERNAL_REDIS=1 ./ci/ci.sh test_large
+
+- label: ":redis: (External Redis) (Medium A-J)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- //python/ray/tests/...
+
+- label: ":redis: (External Redis) (Medium K-Z)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- //python/ray/tests/...
+
+- label: ":python: Debug Test"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip uninstall -y ray
+    - RAY_DEBUG_BUILD=debug ./ci/ci.sh build
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci-debug $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,debug_tests
+      python/ray/tests/...
+
+- label: ":python: (ASAN tests)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - pip install "grpcio >= 1.28.1, <= 1.43.0"
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci --config=asan $(./ci/run/bazel_export_options)
+      --config=asan-buildkite
+      --test_tag_filters=-kubernetes,asan_tests
+      --test_env=CONDA_EXE
+      --test_env=CONDA_PYTHON_EXE
+      --test_env=CONDA_SHLVL
+      --test_env=CONDA_PREFIX
+      --test_env=CONDA_DEFAULT_ENV
+      python/ray/tests/...
+
+# https://github.com/ray-project/ray/issues/22460
+#- label: ":python: (Privileged test)"
+  #conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  #commands:
+    #- LINUX_WHEELS=1 ./ci/ci.sh build
+    #- pip install docker
+     #We build image ray-worker-container:nightly-py36-cpu which have installed podman,but not push it.
+     #And we save this image to a tarball, so that we can load it to podman image storage in the
+     #nested-container which run tests. And in this nested-container, Raylet will start ray worker
+     #process in double-nested-container.
+    #- python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu --build-type BUILDKITE --only-build-worker-container
+    #- mkdir /ray-mount/containers
+    #- docker save -o /ray-mount/containers/images.tar rayproject/ray-worker-container:nightly-py36-cpu
+    #- docker run --rm --privileged -v /ray/containers:/var/lib/containers -v /ray:/ray --entrypoint /bin/bash
+      #rayproject/ray-worker-container:nightly-py36-cpu /ray/ci/build/test-worker-in-container.sh
+
+- label: ":kubernetes: operator"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - |
+      cleanup() {
+        if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi
+        python python/ray/tests/kuberay/setup/teardown_kuberay.py || true
+        kind delete cluster
+      }
+      trap cleanup EXIT
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client.
+    # (Remove thirdparty_files to sidestep an issue with psutil.)
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - pip install -e /ray/python
+    - echo "--- Setting up local kind cluster."
+    - ./ci/k8s/prep-k8s-environment.sh
+    - echo "--- Building py37-cpu Ray image for the test."
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker
+    - python ci/build/build-docker-images.py --py-versions py37 --device-types cpu --build-type LOCAL --build-base
+    # Tag the image built in the last step. We want to be sure to distinguish the image from the real Ray nightly.
+    - docker tag rayproject/ray:nightly-py37-cpu ray-ci:kuberay-test
+    # Load the image into the kind node.
+    - kind load docker-image ray-ci:kuberay-test
+    - echo "--- Setting up KubeRay operator."
+    - python python/ray/tests/kuberay/setup/setup_kuberay.py
+    - ./ci/env/env_info.sh
+    - echo "--- Running the test."
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=kuberay_operator
+      --test_env=RAY_IMAGE=docker.io/library/ray-ci:kuberay-test
+      --test_env=PULL_POLICY=IfNotPresent
+      --test_env=KUBECONFIG=/root/.kube/config
+      python/ray/tests/...
diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml
new file mode 100644
index 000000000000..48274a7f9350
--- /dev/null
+++ b/.buildkite/pipeline.test.yml
@@ -0,0 +1,61 @@
+
+- label: ":book: Lint"
+  commands:
+    - export LINT=1
+    - ./ci/env/install-dependencies.sh
+    - ./ci/ci.sh lint
+
+- label: ":book: Documentation"
+  commands:
+    - export LINT=1
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client
+    # (remove thirdparty_files to sidestep an issue with psutil).
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - pushd /ray && git clean -f -f -x -d -e .whl -e python/ray/dashboard/client && popd
+    - bazel clean --expunge
+    - ./ci/ci.sh build
+
+- label: ":book: LinkCheck"
+  commands:
+    - export LINT=1
+    - ./ci/env/install-dependencies.sh
+    - ./ci/ci.sh check_sphinx_links
+  soft_fail: True
+
+
+- label: ":octopus: Tune soft imports test"
+  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    # no TUNE_TESTING=1 on purpose
+    - ./ci/env/install-dependencies.sh
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
+
+# Test to see if Train can be used without torch, tf, etc. installed
+- label: ":steam_locomotive: Train minimal install"
+  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  commands:
+      - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+      - TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
+      - ./ci/env/env_info.sh
+      - python ./ci/env/check_minimal_install.py
+      - bazel test --config=ci $(./ci/run/bazel_export_options)  --build_tests_only --test_tag_filters=minimal python/ray/train/...
+
+
+- label: ":python: Ray DAG Tests"
+  conditions:
+    [
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip install -U pydot
+    - sudo apt-get install -y graphviz
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=ray_dag_tests
+      python/ray/dag/...
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 6450aee27e08..a6ca78843880 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,571 +1,5 @@
-- label: ":ferris_wheel: Wheels and Jars"
-  conditions:
-    [
-        "RAY_CI_LINUX_WHEELS_AFFECTED",
-        "RAY_CI_JAVA_AFFECTED",
-    ]
-  commands:
-    # Build the wheels and jars
-    - UPLOAD_WHEELS_AS_ARTIFACTS=1 LINUX_WHEELS=1 LINUX_JARS=1 ./ci/ci.sh build
-    - bash ./java/build-jar-multiplatform.sh linux
-    # Upload the wheels and jars
-    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
-    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    # Upload to branch directory.
-    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
-    - python .buildkite/copy_files.py --destination branch_jars --path ./.jar/linux
-    # Upload to latest directory.
-    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
-    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination jars --path ./.jar/linux; fi
-
-- label: ":ferris_wheel: Post-wheel tests"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=post_wheel_build
-      --test_env=CONDA_EXE
-      --test_env=CONDA_PYTHON_EXE
-      --test_env=CONDA_SHLVL
-      --test_env=CONDA_PREFIX
-      --test_env=CONDA_DEFAULT_ENV
-      --test_env=CI
-      --test_env=RAY_CI_POST_WHEEL_TESTS=True
-      python/ray/tests/... python/ray/serve/... python/ray/tune/... rllib/... doc/...
-
-- label: ":ferris_wheel: Debug Wheels"
-  conditions:
-    [
-        "RAY_CI_LINUX_WHEELS_AFFECTED",
-        "RAY_CI_JAVA_AFFECTED",
-    ]
-  commands:
-    # Build the debug wheels
-    - RAY_DEBUG_BUILD=debug LINUX_WHEELS=1 ./ci/ci.sh build
-    # Upload the wheels.
-    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
-    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    # Upload to branch directory.
-    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
-    # Upload to latest directory.
-    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
-
-# Not working now.
-# - label: ":ferris_wheel: ASAN Wheels"
-#   conditions:
-#     [
-#         "RAY_CI_LINUX_WHEELS_AFFECTED",
-#         "RAY_CI_JAVA_AFFECTED",
-#     ]
-#   commands:
-#     # Build the asan wheels
-#     - RAY_DEBUG_BUILD=asan LINUX_WHEELS=1 ./ci/ci.sh build
-#     # Upload the wheels.
-#     # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
-#     - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
-#     - pip install -q docker aws_requests_auth boto3
-#     # Upload to branch directory.
-#     - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
-#     # Upload to latest directory.
-#     - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
-
-- label: ":docker: Build Images: py36 (1/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py36 (2/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cu111 cu112 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py37 (1/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py37 (2/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py38 (1/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py38 (2/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py39 (1/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py39 (2/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py310 (1/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
-
-- label: ":docker: Build Images: py310 (2/2)"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker aws_requests_auth boto3
-    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
-    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
-
 - label: ":book: Lint"
   commands:
     - export LINT=1
     - ./ci/env/install-dependencies.sh
     - ./ci/ci.sh lint
-
-- label: ":book: Documentation"
-  commands:
-    - export LINT=1
-    - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
-    # Uninstall and re-install Ray so that we can use Ray Client
-    # (remove thirdparty_files to sidestep an issue with psutil).
-    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
-    - pushd /ray && git clean -f -f -x -d -e .whl -e python/ray/dashboard/client && popd
-    - bazel clean --expunge
-    - ./ci/ci.sh build
-
-- label: ":book: LinkCheck"
-  commands:
-    - export LINT=1
-    - ./ci/env/install-dependencies.sh
-    - ./ci/ci.sh check_sphinx_links
-  soft_fail: True
-
-- label: ":java: Java"
-  conditions: ["RAY_CI_JAVA_AFFECTED"]
-  commands:
-    - ./java/test.sh
-
-- label: ":cpp: Ray CPP Worker"
-  conditions: [ "RAY_CI_CPP_AFFECTED" ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_cpp
-
-- label: ":cpp: Tests"
-  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bazel test --config=ci --config=llvm $(./ci/run/bazel_export_options)
-      --build_tests_only
-      -- //:all -rllib/... -core_worker_test
-
-- label: ":cpp: Tests (ASAN)"
-  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bazel test --config=ci --config=asan-clang $(./ci/run/bazel_export_options)
-      --build_tests_only
-      --jobs=2
-      -- //:all -//:core_worker_test
-
-- label: ":cpp: Tests (UBSAN)"
-  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bazel test --config=ci --config=ubsan $(./ci/run/bazel_export_options)
-      --build_tests_only
-      --jobs=2
-      -- //:all -//:core_worker_test -//:logging_test -//:ray_syncer_test
-
-- label: ":cpp: Tests (TSAN)"
-  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bazel test --config=ci --config=tsan-clang $(./ci/run/bazel_export_options)
-      --build_tests_only
-      --jobs=2
-      -- //:all -//:core_worker_test -//:event_test -//:gcs_actor_manager_test
-      -//:gcs_placement_group_manager_test -//:gcs_placement_group_scheduler_test
-      -//:gcs_server_rpc_test -//:gcs_client_test -//:gcs_heartbeat_manager_test
-      -//:metric_exporter_client_test -//:stats_test -//:worker_pool_test
-      -//:ray_syncer_test
-
-- label: ":serverless: Dashboard Tests"
-  conditions:
-    [
-        "RAY_CI_DASHBOARD_AFFECTED",
-        "RAY_CI_PYTHON_AFFECTED",
-    ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - ./dashboard/tests/run_ui_tests.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options) python/ray/dashboard/...
-
-- label: ":serverless: Serve Release Tests"
-  conditions:
-    [
-        "RAY_CI_SERVE_AFFECTED",
-        "RAY_CI_PYTHON_AFFECTED",
-    ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
-    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=team:serve
-      release/...
-
-- label: ":serverless: Serve Tests"
-  parallelism: 3
-  conditions:
-    [
-        "RAY_CI_SERVE_AFFECTED",
-        "RAY_CI_PYTHON_AFFECTED",
-        "RAY_CI_ML_AFFECTED",
-    ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
-    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
-    - ./ci/env/env_info.sh
-    - >-
-      set -x;
-      python ./ci/run/bazel-sharding.py
-      --exclude_manual
-      --index "\${BUILDKITE_PARALLEL_JOB}" --count "\${BUILDKITE_PARALLEL_JOB_COUNT}"
-      python/ray/serve/...
-      > test_shard.txt
-    - cat test_shard.txt
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=-post_wheel_build,-py37,-gpu
-      $(cat test_shard.txt)
-
-
-- label: ":serverless: Serve Tests (Python 3.7)"
-  conditions:
-    [
-        "RAY_CI_SERVE_AFFECTED",
-        "RAY_CI_PYTHON_AFFECTED",
-    ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
-    # Uninstall and re-install Ray so that we can use Ray Client.
-    # (Remove thirdparty_files to sidestep an issue with psutil.)
-    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
-    - ./ci/ci.sh build
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=team:serve
-      python/ray/serve/test_gradio
-      python/ray/serve/test_gradio_visualization
-
-
-- label: ":python: Minimal install 3.6"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_minimal 3.6
-
-- label: ":python: Minimal install 3.7"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_minimal 3.7
-
-- label: ":python: Minimal install 3.8"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_minimal 3.8
-
-- label: ":python: Minimal install 3.9"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_minimal 3.9
-
-- label: ":python: Minimal install 3.10"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/ci.sh test_minimal 3.10
-
-- label: ":python: Default install"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/install-default.sh
-    - ./ci/env/env_info.sh
-    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
-      python/ray/dashboard/test_dashboard
-
-- label: ":python: Ray Serve default install"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/install-serve.sh
-    - ./ci/env/env_info.sh
-    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
-      python/ray/serve/test_deployment_graph
-    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
-      python/ray/serve/test_api
-
-- label: ":python: Release test package unit tests"
-  conditions: ["ALWAYS"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - pip install -e release/
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --build_tests_only
-      --test_tag_filters=release_unit
-      release/...
-
-- label: ":python: (Small & Client)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bash ./ci/ci.sh prepare_docker
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=client_tests,small_size_python_tests
-      -- python/ray/tests/...
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=ray_ha
-      --test_env=DOCKER_HOST=tcp://docker:2376
-      --test_env=DOCKER_TLS_VERIFY=1
-      --test_env=DOCKER_CERT_PATH=/certs/client
-      --test_env=DOCKER_TLS_CERTDIR=/certs
-      -- python/ray/tests/...
-
-- label: ":python: (Large)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  parallelism: 3
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - ./ci/ci.sh test_large
-
-- label: ":python: (Medium A-J)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
-      python/ray/tests/...
-
-- label: ":python: (Medium K-Z)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
-      python/ray/tests/...
-
-- label: ":redis: (External Redis) (Small & Client)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=client_tests,small_size_python_tests
-      --test_env=TEST_EXTERNAL_REDIS=1
-      -- python/ray/tests/...
-
-- label: ":redis: (External Redis) (Large)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  parallelism: 3
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - TEST_EXTERNAL_REDIS=1 ./ci/ci.sh test_large
-
-- label: ":redis: (External Redis) (Medium A-J)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
-      --test_env=TEST_EXTERNAL_REDIS=1
-      -- //python/ray/tests/...
-
-- label: ":redis: (External Redis) (Medium K-Z)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
-      --test_env=TEST_EXTERNAL_REDIS=1
-      -- //python/ray/tests/...
-
-- label: ":python: Debug Test"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - pip uninstall -y ray
-    - RAY_DEBUG_BUILD=debug ./ci/ci.sh build
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci-debug $(./ci/run/bazel_export_options)
-      --test_tag_filters=-kubernetes,debug_tests
-      python/ray/tests/...
-
-- label: ":python: (ASAN tests)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
-    - pip install "grpcio >= 1.28.1, <= 1.43.0"
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci --config=asan $(./ci/run/bazel_export_options)
-      --config=asan-buildkite
-      --test_tag_filters=-kubernetes,asan_tests
-      --test_env=CONDA_EXE
-      --test_env=CONDA_PYTHON_EXE
-      --test_env=CONDA_SHLVL
-      --test_env=CONDA_PREFIX
-      --test_env=CONDA_DEFAULT_ENV
-      python/ray/tests/...
-
-# https://github.com/ray-project/ray/issues/22460
-#- label: ":python: (Privileged test)"
-  #conditions: ["RAY_CI_PYTHON_AFFECTED"]
-  #commands:
-    #- LINUX_WHEELS=1 ./ci/ci.sh build
-    #- pip install docker
-     #We build image ray-worker-container:nightly-py36-cpu which have installed podman,but not push it.
-     #And we save this image to a tarball, so that we can load it to podman image storage in the
-     #nested-container which run tests. And in this nested-container, Raylet will start ray worker
-     #process in double-nested-container.
-    #- python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu --build-type BUILDKITE --only-build-worker-container
-    #- mkdir /ray-mount/containers
-    #- docker save -o /ray-mount/containers/images.tar rayproject/ray-worker-container:nightly-py36-cpu
-    #- docker run --rm --privileged -v /ray/containers:/var/lib/containers -v /ray:/ray --entrypoint /bin/bash
-      #rayproject/ray-worker-container:nightly-py36-cpu /ray/ci/build/test-worker-in-container.sh
-
-- label: ":octopus: Tune soft imports test"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    # no TUNE_TESTING=1 on purpose
-    - ./ci/env/install-dependencies.sh
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
-
-# Test to see if Train can be used without torch, tf, etc. installed
-- label: ":steam_locomotive: Train minimal install"
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
-  commands:
-      - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-      - TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
-      - ./ci/env/env_info.sh
-      - python ./ci/env/check_minimal_install.py
-      - bazel test --config=ci $(./ci/run/bazel_export_options)  --build_tests_only --test_tag_filters=minimal python/ray/train/...
-
-- label: ":kubernetes: operator"
-  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
-  commands:
-    - |
-      cleanup() {
-        if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi
-        python python/ray/tests/kuberay/setup/teardown_kuberay.py || true
-        kind delete cluster
-      }
-      trap cleanup EXIT
-    - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
-    # Uninstall and re-install Ray so that we can use Ray Client.
-    # (Remove thirdparty_files to sidestep an issue with psutil.)
-    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
-    - pip install -e /ray/python
-    - echo "--- Setting up local kind cluster."
-    - ./ci/k8s/prep-k8s-environment.sh
-    - echo "--- Building py37-cpu Ray image for the test."
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - pip install -q docker
-    - python ci/build/build-docker-images.py --py-versions py37 --device-types cpu --build-type LOCAL --build-base
-    # Tag the image built in the last step. We want to be sure to distinguish the image from the real Ray nightly.
-    - docker tag rayproject/ray:nightly-py37-cpu ray-ci:kuberay-test
-    # Load the image into the kind node.
-    - kind load docker-image ray-ci:kuberay-test
-    - echo "--- Setting up KubeRay operator."
-    - python python/ray/tests/kuberay/setup/setup_kuberay.py
-    - ./ci/env/env_info.sh
-    - echo "--- Running the test."
-    - bazel test --config=ci $(./ci/run/bazel_export_options)
-      --test_tag_filters=kuberay_operator
-      --test_env=RAY_IMAGE=docker.io/library/ray-ci:kuberay-test
-      --test_env=PULL_POLICY=IfNotPresent
-      --test_env=KUBECONFIG=/root/.kube/config
-      python/ray/tests/...
-
-- label: ":python: Ray DAG Tests"
-  conditions:
-    [
-        "RAY_CI_PYTHON_AFFECTED",
-    ]
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - pip install -U pydot
-    - sudo apt-get install -y graphviz
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./scripts/bazel_export_options)
-      --test_tag_filters=ray_dag_tests
-      python/ray/dag/...

From 40932c5c5146df7f8ef38a843b13f576404aba6f Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:02:57 +0100
Subject: [PATCH 03/61] Install dependencies update

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-dependencies.sh | 42 ++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index 3b24f6852e41..5303a1112732 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -285,23 +285,7 @@ download_mnist() {
   unzip "${HOME}/data/mnist.zip" -d "${HOME}/data"
 }
 
-install_dependencies() {
-
-  install_bazel
-  install_base
-  install_toolchains
-
-  install_upgrade_pip
-  if [ -n "${PYTHON-}" ] || [ "${LINT-}" = 1 ] || [ "${MINIMAL_INSTALL-}" = "1" ]; then
-    install_miniconda
-    # Upgrade the miniconda pip.
-    install_upgrade_pip
-  fi
-
-  install_nvm
-  if [ -n "${PYTHON-}" ] || [ -n "${LINT-}" ] || [ "${MAC_WHEELS-}" = 1 ]; then
-    install_node
-  fi
+install_pip_packages() {
 
   # Install modules needed in all jobs.
   alias pip="python -m pip"
@@ -431,6 +415,30 @@ install_dependencies() {
   CC=gcc pip install psutil setproctitle==1.2.2 colorama --target="${WORKSPACE_DIR}/python/ray/thirdparty_files"
 }
 
+install_dependencies() {
+  install_bazel
+
+  if [ "${NO_BUILD-}" != "1" ]; then
+    install_base
+    install_toolchains
+  fi
+
+  if [ -n "${PYTHON-}" ] || [ "${LINT-}" = 1 ] || [ "${MINIMAL_INSTALL-}" = "1" ]; then
+    install_miniconda
+  fi
+
+  install_upgrade_pip
+
+  if [ "${NO_BUILD-}" != "1" ]; then
+    install_nvm
+    if [ -n "${PYTHON-}" ] || [ -n "${LINT-}" ] || [ "${MAC_WHEELS-}" = 1 ]; then
+      install_node
+    fi
+  fi
+
+  install_pip_packages
+}
+
 install_dependencies "$@"
 
 # Pop caller's shell options (quietly)

From 06250b0f5bbdae4728b6ccc1c74ca9967e1c6259 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 15 Sep 2022 11:04:38 +0200
Subject: [PATCH 04/61] [CI] [Hackathon] Add dockerfiles for decoupled
 bootstrapping/Library tests (#28535)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [core/ci] Disallow protobuf 3.19.5 (#28504)

This leads to hangs in Ray client (e.g. test_dataclient_disconnect)

Signed-off-by: Kai Fricke <kai@anyscale.com>

* [tune] Fix trial checkpoint syncing after recovery from other node (#28470)

On restore from a different IP, the SyncerCallback currently still tries to sync from a stale node IP, because `trial.last_result` has not been updated, yet. Instead, the syncer callback should keep its own map of trials to IPs, and only act on this.

Signed-off-by: Kai Fricke <kai@anyscale.com>

* [air] minor example fix. (#28379)

Signed-off-by: xwjiang2010 <xwjiang2010@gmail.com>

* [cleanup] Remove memory unit conversion (#28396)

The internal memory unit was switched back to bytes years ago, there's no point in keeping confusing conversion code around anymore.

Recommendation: Review #28394 first, since this is stacked on top of it.

Co-authored-by: Alex <alex@anyscale.com>

* [RLlib] Sync policy specs from local_worker_for_synching while recovering rollout/eval workers. (#28422)

* Cast rewards as tf.float32 to fix error in DQN in tf2 (#28384)

* Cast rewards as tf.float32 to fix error in DQN in tf2

Signed-off-by: mgerstgrasser <matthias@gerstgrasser.net>

* Add test case for DQN with integer rewards

Signed-off-by: mgerstgrasser <matthias@gerstgrasser.net>

Signed-off-by: mgerstgrasser <matthias@gerstgrasser.net>

* [doc] [Datasets] Improve docstring and doctest for read_parquet (#28488)

This addresses some of the issues brought up in https://github.com/ray-project/ray/issues/28484

* [ci] Increase timeout on test_metrics (#28508)

10 milliseconds is ambitious for the CI to do anything.

Co-authored-by: Alex <alex@anyscale.com>

* [air/tune] Catch empty hyperopt search space, raise better Tuner error message (#28503)

* Add imports to object-spilling.rst Python code (#28507)

* Add imports to object-spilling.rst Python code

Also adjust a couple descriptions, retaining the same general information

Signed-off-by: Jake <DevJake@users.noreply.github.com>

* fix doc build / keep note formatting

Signed-off-by: Philipp Moritz <pcmoritz@gmail.com>

* another tiny fix

Signed-off-by: Philipp Moritz <pcmoritz@gmail.com>

Signed-off-by: Jake <DevJake@users.noreply.github.com>
Signed-off-by: Philipp Moritz <pcmoritz@gmail.com>
Co-authored-by: Philipp Moritz <pcmoritz@gmail.com>

* [AIR] Make PathPartitionScheme a dataclass (#28390)

Signed-off-by: Balaji Veeramani <bveeramani@berkeley.edu>

* [Telemetry][Kuberentes] Distinguish Kubernetes deployment stacks (#28490)

Right now, Ray telemetry indicates the majority of Ray's CPU hour usage comes from Ray running within a Kubernetes cluster. However, we have no data on what method is used to deploy Ray on Kubernetes.

This PR enables Ray telemetry to distinguish between three methods of deploying Ray on Kubernetes:

KubeRay >= 0.4.0
Legacy Ray Operator with Ray >= 2.1.0
All other methods
The strategy is to have the operators inject an env variable into the Ray container's environment.
The variable identifies the deployment method.

This PR also modifies the legacy Ray operator to inject the relevant env variable.
A follow-up KubeRay PR changes the KubeRay operator to do the same thing: ray-project/kuberay#562

Signed-off-by: Dmitri Gekhtman <dmitri.m.gekhtman@gmail.com>

* [autoscaler][observability] Experimental verbose mode (#28392)

This PR introduces a super secret hidden verbose mode for ray status, which we can keep hidden while collecting feedback before going through the process of officially declaring it part of the public API.

Example output

======== Autoscaler status: 2020-12-28 01:02:03 ========
GCS request time: 3.141500s
Node Provider non_terminated_nodes time: 1.618000s

Node status
--------------------------------------------------------
Healthy:
 2 p3.2xlarge
 20 m4.4xlarge
Pending:
 m4.4xlarge, 2 launching
 1.2.3.4: m4.4xlarge, waiting-for-ssh
 1.2.3.5: m4.4xlarge, waiting-for-ssh
Recent failures:
 p3.2xlarge: RayletUnexpectedlyDied (ip: 1.2.3.6)

Resources
--------------------------------------------------------
Total Usage:
 1/2 AcceleratorType:V100
 530.0/544.0 CPU
 2/2 GPU
 2.00/8.000 GiB memory
 3.14/16.000 GiB object_store_memory

Total Demands:
 {'CPU': 1}: 150+ pending tasks/actors
 {'CPU': 4} * 5 (PACK): 420+ pending placement groups
 {'CPU': 16}: 100+ from request_resources()

Node: 192.168.1.1
 Usage:
  0.1/1 AcceleratorType:V100
  5.0/20.0 CPU
  0.7/1 GPU
  1.00/4.000 GiB memory
  3.14/4.000 GiB object_store_memory

Node: 192.168.1.2
 Usage:
  0.9/1 AcceleratorType:V100
  15.0/20.0 CPU
  0.3/1 GPU
  1.00/12.000 GiB memory
  0.00/4.000 GiB object_store_memory

Co-authored-by: Alex <alex@anyscale.com>

* [doc/tune] fix tune stopper attribute name (#28517)

* [doc] Fix tune stopper doctests (#28531)

* [air] Use self-hosted mirror for CIFAR10 dataset (#28480)

The CIFAR10 website host has been unreliable in the past. This PR injects our own mirror into our CI packages for testing.

Signed-off-by: Kai Fricke <kai@anyscale.com>

* draft

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>

Signed-off-by: Kai Fricke <kai@anyscale.com>
Signed-off-by: xwjiang2010 <xwjiang2010@gmail.com>
Signed-off-by: mgerstgrasser <matthias@gerstgrasser.net>
Signed-off-by: Jake <DevJake@users.noreply.github.com>
Signed-off-by: Philipp Moritz <pcmoritz@gmail.com>
Signed-off-by: Balaji Veeramani <bveeramani@berkeley.edu>
Signed-off-by: Dmitri Gekhtman <dmitri.m.gekhtman@gmail.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
Co-authored-by: Kai Fricke <krfricke@users.noreply.github.com>
Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com>
Co-authored-by: Alex Wu <alex@anyscale.io>
Co-authored-by: Alex <alex@anyscale.com>
Co-authored-by: Jun Gong <jungong@anyscale.com>
Co-authored-by: mgerstgrasser <matthias@gerstgrasser.net>
Co-authored-by: Philipp Moritz <pcmoritz@gmail.com>
Co-authored-by: Jake <DevJake@users.noreply.github.com>
Co-authored-by: Balaji Veeramani <bveeramani@berkeley.edu>
Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com>
Co-authored-by: Árpád Rózsás <rozsasarpi@gmail.com>
---
 ci/docker/Dockerfile.base                     |  57 ++
 ci/docker/Dockerfile.build                    |  15 +
 ci/docker/Dockerfile.gpu                      |   2 +
 ci/docker/Dockerfile.ml                       |   3 +
 ci/docker/Dockerfile.test                     |   1 +
 ci/env/install-dependencies.sh                |  12 +
 .../ray-core/objects/object-spilling.rst      |  27 +-
 doc/source/tune/api_docs/stoppers.rst         |   5 +
 python/ray/_private/ray_constants.py          |  35 -
 python/ray/_private/resource_spec.py          |   9 +-
 python/ray/_private/usage/usage_constants.py  |   8 +
 python/ray/_private/usage/usage_lib.py        |  13 +-
 python/ray/_private/utils.py                  |   6 +-
 python/ray/autoscaler/_private/autoscaler.py  |  12 +-
 python/ray/autoscaler/_private/commands.py    |  11 +-
 python/ray/autoscaler/_private/constants.py   |   5 +-
 .../_private/fake_multi_node/example.yaml     |   1 +
 .../ray/autoscaler/_private/load_metrics.py   |  40 +-
 python/ray/autoscaler/_private/monitor.py     |   8 +
 .../_private/resource_demand_scheduler.py     |  23 +-
 python/ray/autoscaler/_private/util.py        |  77 ++-
 python/ray/data/datasource/partitioning.py    |  73 +-
 python/ray/data/read_api.py                   |  20 +-
 python/ray/ray_operator/operator_utils.py     |   9 +
 python/ray/scripts/scripts.py                 |  12 +-
 .../ray/tests/test_k8s_operator_unit_tests.py |  12 +
 python/ray/tests/test_metrics.py              |   4 +-
 .../tests/test_resource_demand_scheduler.py   | 220 ++++++
 python/ray/tests/test_usage_stats.py          |  13 +
 python/ray/tune/experiment/trial.py           |  11 +
 .../tune/search/hyperopt/hyperopt_search.py   |  16 +
 python/ray/tune/stopper/stopper.py            |  80 +--
 python/ray/tune/syncer.py                     |  34 +-
 python/ray/tune/tests/test_sample.py          |  11 +
 python/ray/tune/tests/test_syncer_callback.py |  29 +-
 python/ray/tune/trainable/trainable.py        |   9 +-
 python/ray/tune/tuner.py                      |  18 +-
 python/ray/tune/utils/resource_updater.py     |   7 +-
 python/ray/util/placement_group.py            |   6 -
 python/requirements.txt                       |   2 +-
 release/requirements.txt                      |   2 +-
 release/requirements_buildkite.txt            |   2 +-
 rllib/algorithms/dqn/dqn_tf_policy.py         |   2 +-
 rllib/algorithms/dqn/tests/test_dqn.py        |  39 ++
 .../algorithms/simple_q/simple_q_tf_policy.py |   2 +-
 rllib/evaluation/worker_set.py                |  45 +-
 rllib/examples/custom_train_fn.py             |   1 +
 rllib/tests/test_worker_failures.py           | 628 ++++++++++++------
 48 files changed, 1213 insertions(+), 464 deletions(-)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index e69de29bb2d1..5a74288dc7e9 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -0,0 +1,57 @@
+FROM ubuntu:focal
+
+ARG REMOTE_CACHE_URL
+ARG BUILDKITE_PULL_REQUEST
+ARG BUILDKITE_COMMIT
+ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
+ARG PYTHON=3.6
+ARG INSTALL_DEPENDENCIES
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=America/Los_Angeles
+
+ENV BUILDKITE=true
+ENV CI=true
+ENV PYTHON=$PYTHON
+ENV RAY_USE_RANDOM_PORTS=1
+ENV RAY_DEFAULT_BUILD=1
+ENV RAY_INSTALL_JAVA=1
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+# For wheel build
+# https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
+ENV DOCKER_TLS_CERTDIR=/certs
+ENV DOCKER_HOST=tcp://docker:2376
+ENV DOCKER_TLS_VERIFY=1
+ENV DOCKER_CERT_PATH=/certs/client
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
+
+RUN apt-get update -qq && apt-get upgrade -qq
+RUN apt-get install -y -qq \
+    curl python-is-python3 git build-essential \
+    sudo unzip unrar apt-utils dialog tzdata wget rsync \
+    language-pack-en tmux cmake gdb vim htop \
+    libgtk2.0-dev zlib1g-dev libgl1-mesa-dev maven \
+    openjdk-8-jre openjdk-8-jdk clang-format-12 jq \
+    clang-tidy-12 clang-12
+# Make using GCC 9 explicit.
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \
+    --slave /usr/bin/gcov gcov /usr/bin/gcov-9
+RUN ln -s /usr/bin/clang-format-12 /usr/bin/clang-format && \
+    ln -s /usr/bin/clang-tidy-12 /usr/bin/clang-tidy && \
+    ln -s /usr/bin/clang-12 /usr/bin/clang
+
+RUN curl -o- https://get.docker.com | sh
+
+# System conf for tests
+RUN locale -a
+ENV LC_ALL=en_US.utf8
+ENV LANG=en_US.utf8
+RUN echo "ulimit -c 0" >> /root/.bashrc
+
+# Setup Bazel caches
+RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
+    (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
+    cat /root/.bazelrc
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index e69de29bb2d1..b6eeb5fec850 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -0,0 +1,15 @@
+FROM [Dockerfile.base image]
+
+RUN mkdir /ray
+WORKDIR /ray
+
+# Below should be re-run each time
+COPY . .
+RUN ./ci/ci.sh init
+RUN bash --login -i ./ci/ci.sh build
+
+RUN (if [ "${INSTALL_DEPENDENCIES}" = "ML" ]; then RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh; fi)
+
+# Run determine test to run
+RUN bash --login -i -c "python ./ci/pipeline/determine_tests_to_run.py --output=json > affected_set.json"
+RUN cat affected_set.json
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index e69de29bb2d1..b9d4c20a51e6 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -0,0 +1,2 @@
+FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu20.04
+
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index e69de29bb2d1..9be76a290a95 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -0,0 +1,3 @@
+FROM [Dockerfile.test image]
+
+RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index e69de29bb2d1..9cc9f1a39401 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -0,0 +1 @@
+FROM ubuntu:focal
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index 3b24f6852e41..00d7f7879681 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -421,6 +421,18 @@ install_dependencies() {
     pip install --upgrade tensorflow-probability=="${TFP_VERSION}" tensorflow=="${TF_VERSION}"
   fi
 
+  # Inject our own mirror for the CIFAR10 dataset
+  if [ "${TRAIN_TESTING-}" = 1 ] || [ "${TUNE_TESTING-}" = 1 ] ||  [ "${DOC_TESTING-}" = 1 ]; then
+    SITE_PACKAGES=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+    TF_CIFAR="${SITE_PACKAGES}/tensorflow/python/keras/datasets/cifar10.py"
+    TORCH_CIFAR="${SITE_PACKAGES}/torchvision/datasets/cifar.py"
+
+    [ -f "$TF_CIFAR" ] && sed -i 's https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz https://air-example-data.s3.us-west-2.amazonaws.com/cifar-10-python.tar.gz g' \
+      "$TF_CIFAR"
+    [ -f "$TORCH_CIFAR" ] &&sed -i 's https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz https://air-example-data.s3.us-west-2.amazonaws.com/cifar-10-python.tar.gz g' \
+      "$TORCH_CIFAR"
+  fi
+
   # Additional Tune dependency for Horovod.
   # This must be run last (i.e., torch cannot be re-installed after this)
   if [ "${INSTALL_HOROVOD-}" = 1 ]; then
diff --git a/doc/source/ray-core/objects/object-spilling.rst b/doc/source/ray-core/objects/object-spilling.rst
index f2c8fd26c290..c1297047c36d 100644
--- a/doc/source/ray-core/objects/object-spilling.rst
+++ b/doc/source/ray-core/objects/object-spilling.rst
@@ -7,12 +7,15 @@ Ray 1.3+ spills objects to external storage once the object store is full. By de
 Single node
 -----------
 
-Ray uses object spilling by default. Without any setting, objects are spilled to `[temp_folder]/spill`. `temp_folder` is `/tmp` for Linux and MacOS by default.
+Ray uses object spilling by default. Without any setting, objects are spilled to `[temp_folder]/spill`. On Linux and MacOS, the `temp_folder` is `/tmp` by default.
 
-To configure the directory where objects are placed, use:
+To configure the directory where objects are spilled to, use:
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             "object_spilling_config": json.dumps(
@@ -26,6 +29,9 @@ usage across multiple physical devices if needed (e.g., SSD devices):
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             "max_io_workers": 4,  # More IO workers for parallelism.
@@ -46,14 +52,18 @@ usage across multiple physical devices if needed (e.g., SSD devices):
         },
     )
 
+
 .. note::
   
-  To optimize the performance, it is recommended to use SSD instead of HDD when using object spilling for memory intensive workloads.
+    To optimize the performance, it is recommended to use an SSD instead of an HDD when using object spilling for memory-intensive workloads.
 
 If you are using an HDD, it is recommended that you specify a large buffer size (> 1MB) to reduce IO requests during spilling.
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             "object_spilling_config": json.dumps(
@@ -74,6 +84,9 @@ The default threshold is 0.95 (95%). You can adjust the threshold by setting ``l
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             # Allow spilling until the local disk is 99% utilized.
@@ -94,6 +107,9 @@ To enable object spilling to remote storage (any URI supported by `smart_open <h
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             "max_io_workers": 4,  # More IO workers for remote storage.
@@ -116,6 +132,9 @@ Spilling to multiple remote storages is also supported.
 
 .. code-block:: python
 
+    import json
+    import ray
+
     ray.init(
         _system_config={
             "max_io_workers": 4,  # More IO workers for remote storage.
@@ -124,7 +143,7 @@ Spilling to multiple remote storages is also supported.
                 {
                   "type": "smart_open", 
                   "params": {
-                    "uri": ["s3://bucket/path1", "s3://bucket/path2, "s3://bucket/path3"],
+                    "uri": ["s3://bucket/path1", "s3://bucket/path2", "s3://bucket/path3"],
                   },
                   "buffer_size": 100 * 1024 * 1024, # Use a 100MB buffer for writes
                 },
diff --git a/doc/source/tune/api_docs/stoppers.rst b/doc/source/tune/api_docs/stoppers.rst
index cf37b49ba1b8..61dc862f6a14 100644
--- a/doc/source/tune/api_docs/stoppers.rst
+++ b/doc/source/tune/api_docs/stoppers.rst
@@ -45,3 +45,8 @@ TimeoutStopper (tune.stopper.TimeoutStopper)
 --------------------------------------------
 
 .. autoclass:: ray.tune.stopper.TimeoutStopper
+
+CombinedStopper (tune.stopper.CombinedStopper)
+----------------------------------------------
+
+.. autoclass:: ray.tune.stopper.CombinedStopper
diff --git a/python/ray/_private/ray_constants.py b/python/ray/_private/ray_constants.py
index e9d44ce1ff10..158070a0d42f 100644
--- a/python/ray/_private/ray_constants.py
+++ b/python/ray/_private/ray_constants.py
@@ -1,7 +1,6 @@
 """Ray constants used in the Python code."""
 
 import logging
-import math
 import os
 
 logger = logging.getLogger(__name__)
@@ -118,9 +117,6 @@ def env_bool(key, default):
 # for large resource quantities due to bookkeeping of specific resource IDs.
 MAX_RESOURCE_QUANTITY = 100e12
 
-# Each memory "resource" counts as this many bytes of memory.
-MEMORY_RESOURCE_UNIT_BYTES = 1
-
 # Number of units 1 resource can be subdivided into.
 MIN_RESOURCE_GRANULARITY = 0.0001
 
@@ -132,37 +128,6 @@ def env_bool(key, default):
 RAY_OVERRIDE_DASHBOARD_URL = "RAY_OVERRIDE_DASHBOARD_URL"
 
 
-def round_to_memory_units(memory_bytes, round_up):
-    """Round bytes to the nearest memory unit."""
-    return from_memory_units(to_memory_units(memory_bytes, round_up))
-
-
-def from_memory_units(memory_units):
-    """Convert from memory units -> bytes."""
-    return memory_units * MEMORY_RESOURCE_UNIT_BYTES
-
-
-def to_memory_units(memory_bytes, round_up):
-    """Convert from bytes -> memory units."""
-    value = memory_bytes / MEMORY_RESOURCE_UNIT_BYTES
-    if value < 1:
-        raise ValueError(
-            "The minimum amount of memory that can be requested is {} bytes, "
-            "however {} bytes was asked.".format(
-                MEMORY_RESOURCE_UNIT_BYTES, memory_bytes
-            )
-        )
-    if isinstance(value, float) and not value.is_integer():
-        # TODO(ekl) Ray currently does not support fractional resources when
-        # the quantity is greater than one. We should fix memory resources to
-        # be allocated in units of bytes and not 100MB.
-        if round_up:
-            value = int(math.ceil(value))
-        else:
-            value = int(math.floor(value))
-    return int(value)
-
-
 # Different types of Ray errors that can be pushed to the driver.
 # TODO(rkn): These should be defined in flatbuffers and must be synced with
 # the existing C++ definitions.
diff --git a/python/ray/_private/resource_spec.py b/python/ray/_private/resource_spec.py
index aea104b1fbe0..44da27880fb7 100644
--- a/python/ray/_private/resource_spec.py
+++ b/python/ray/_private/resource_spec.py
@@ -90,17 +90,12 @@ def to_resource_dict(self):
         """
         assert self.resolved()
 
-        memory_units = ray_constants.to_memory_units(self.memory, round_up=False)
-        object_store_memory_units = ray_constants.to_memory_units(
-            self.object_store_memory, round_up=False
-        )
-
         resources = dict(
             self.resources,
             CPU=self.num_cpus,
             GPU=self.num_gpus,
-            memory=memory_units,
-            object_store_memory=object_store_memory_units,
+            memory=self.memory,
+            object_store_memory=self.object_store_memory,
         )
 
         resources = {
diff --git a/python/ray/_private/usage/usage_constants.py b/python/ray/_private/usage/usage_constants.py
index 7efb0bc01f51..85aee1fc97c0 100644
--- a/python/ray/_private/usage/usage_constants.py
+++ b/python/ray/_private/usage/usage_constants.py
@@ -49,3 +49,11 @@
 EXTRA_USAGE_TAG_PREFIX = "extra_usage_tag_"
 
 USAGE_STATS_NAMESPACE = "usage_stats"
+
+KUBERNETES_SERVICE_HOST_ENV = "KUBERNETES_SERVICE_HOST"
+KUBERAY_ENV = "RAY_USAGE_STATS_KUBERAY_IN_USE"
+LEGACY_RAY_OPERATOR_ENV = "RAY_USAGE_STATS_LEGACY_OPERATOR_IN_USE"
+
+PROVIDER_KUBERNETES_GENERIC = "kubernetes"
+PROVIDER_KUBERAY = "kuberay"
+PROVIDER_LEGACY_RAY_OPERATOR = "legacy_ray_operator"
diff --git a/python/ray/_private/usage/usage_lib.py b/python/ray/_private/usage/usage_lib.py
index 9b8be78d3093..d699f0431397 100644
--- a/python/ray/_private/usage/usage_lib.py
+++ b/python/ray/_private/usage/usage_lib.py
@@ -757,8 +757,17 @@ def get_instance_type(node_config):
     except FileNotFoundError:
         # It's a manually started cluster or k8s cluster
         result = ClusterConfigToReport()
-        if "KUBERNETES_SERVICE_HOST" in os.environ:
-            result.cloud_provider = "kubernetes"
+        # Check if we're on Kubernetes
+        if usage_constant.KUBERNETES_SERVICE_HOST_ENV in os.environ:
+            # Check if we're using KubeRay >= 0.4.0.
+            if usage_constant.KUBERAY_ENV in os.environ:
+                result.cloud_provider = usage_constant.PROVIDER_KUBERAY
+            # Check if we're using the legacy Ray Operator with Ray >= 2.1.0.
+            elif usage_constant.LEGACY_RAY_OPERATOR_ENV in os.environ:
+                result.cloud_provider = usage_constant.PROVIDER_LEGACY_RAY_OPERATOR
+            # Else, we're on Kubernetes but not in either of the above categories.
+            else:
+                result.cloud_provider = usage_constant.PROVIDER_KUBERNETES_GENERIC
         return result
     except Exception as e:
         logger.info(f"Failed to get cluster config to report {e}")
diff --git a/python/ray/_private/utils.py b/python/ray/_private/utils.py
index 4e2d66cfd1d0..fe748d893ef8 100644
--- a/python/ray/_private/utils.py
+++ b/python/ray/_private/utils.py
@@ -395,11 +395,9 @@ def resources_from_ray_options(options_dict: Dict[str, Any]) -> Dict[str, Any]:
     if num_gpus is not None:
         resources["GPU"] = num_gpus
     if memory is not None:
-        resources["memory"] = ray_constants.to_memory_units(memory, round_up=True)
+        resources["memory"] = memory
     if object_store_memory is not None:
-        resources["object_store_memory"] = ray_constants.to_memory_units(
-            object_store_memory, round_up=True
-        )
+        resources["object_store_memory"] = object_store_memory
     if accelerator_type is not None:
         resources[
             f"{ray_constants.RESOURCE_CONSTRAINT_PREFIX}{accelerator_type}"
diff --git a/python/ray/autoscaler/_private/autoscaler.py b/python/ray/autoscaler/_private/autoscaler.py
index 947835531cd6..0a7931953c3e 100644
--- a/python/ray/autoscaler/_private/autoscaler.py
+++ b/python/ray/autoscaler/_private/autoscaler.py
@@ -113,6 +113,7 @@ class NonTerminatedNodes:
     """Class to extract and organize information on non-terminated nodes."""
 
     def __init__(self, provider: NodeProvider):
+        start_time = time.time()
         # All non-terminated nodes
         self.all_node_ids = provider.non_terminated_nodes({})
 
@@ -128,8 +129,15 @@ def __init__(self, provider: NodeProvider):
             elif node_kind == NODE_KIND_HEAD:
                 self.head_id = node
 
-        # Note: For typical use-cases,
-        # self.all_node_ids == self.worker_ids + [self.head_id]
+        # Note: For typical use-cases, self.all_node_ids == self.worker_ids +
+        # [self.head_id]. The difference being in the case of unmanaged nodes.
+
+        # Record the time of the non_terminated nodes call. This typically
+        # translates to a "describe" or "list" call on most cluster managers
+        # which can be quite expensive. Note that we include the processing
+        # time because on some clients, there may be pagination and the
+        # underlying api calls may be done lazily.
+        self.non_terminated_nodes_time = time.time() - start_time
 
     def remove_terminating_nodes(self, terminating_nodes: List[NodeID]) -> None:
         """Remove nodes we're in the process of terminating from internal
diff --git a/python/ray/autoscaler/_private/commands.py b/python/ray/autoscaler/_private/commands.py
index 7035b4146c82..839c565d68c1 100644
--- a/python/ray/autoscaler/_private/commands.py
+++ b/python/ray/autoscaler/_private/commands.py
@@ -125,7 +125,7 @@ def try_reload_log_state(provider_config: Dict[str, Any], log_state: dict) -> No
         return reload_log_state(log_state)
 
 
-def debug_status(status, error) -> str:
+def debug_status(status, error, verbose: bool = False) -> str:
     """Return a debug string for the autoscaler."""
     if status:
         status = status.decode("utf-8")
@@ -133,6 +133,8 @@ def debug_status(status, error) -> str:
         lm_summary_dict = status_dict.get("load_metrics_report")
         autoscaler_summary_dict = status_dict.get("autoscaler_report")
         timestamp = status_dict.get("time")
+        gcs_request_time = status_dict.get("gcs_request_time")
+        non_terminated_nodes_time = status_dict.get("non_terminated_nodes_time")
         if lm_summary_dict and autoscaler_summary_dict and timestamp:
             lm_summary = LoadMetricsSummary(**lm_summary_dict)
             node_availability_summary_dict = autoscaler_summary_dict.pop(
@@ -147,7 +149,12 @@ def debug_status(status, error) -> str:
             )
             report_time = datetime.datetime.fromtimestamp(timestamp)
             status = format_info_string(
-                lm_summary, autoscaler_summary, time=report_time
+                lm_summary,
+                autoscaler_summary,
+                time=report_time,
+                gcs_request_time=gcs_request_time,
+                non_terminated_nodes_time=non_terminated_nodes_time,
+                verbose=verbose,
             )
         else:
             status = "No cluster status."
diff --git a/python/ray/autoscaler/_private/constants.py b/python/ray/autoscaler/_private/constants.py
index 8a2c1c2a9eae..8cd6091b9e22 100644
--- a/python/ray/autoscaler/_private/constants.py
+++ b/python/ray/autoscaler/_private/constants.py
@@ -6,7 +6,6 @@
     DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES,
     DEFAULT_OBJECT_STORE_MEMORY_PROPORTION,
     LOGGER_FORMAT,
-    MEMORY_RESOURCE_UNIT_BYTES,
     RESOURCES_ENVIRONMENT_VARIABLE,
 )
 
@@ -60,6 +59,10 @@ def env_integer(key, default):
     "AUTOSCALER_NODE_AVAILABILITY_MAX_STALENESS_S", 30 * 60
 )
 
+AUTOSCALER_REPORT_PER_NODE_STATUS = (
+    env_integer("AUTOSCALER_REPORT_PER_NODE_STATUS", 1) == 1
+)
+
 # The maximum allowed resource demand vector size to guarantee the resource
 # demand scheduler bin packing algorithm takes a reasonable amount of time
 # to run.
diff --git a/python/ray/autoscaler/_private/fake_multi_node/example.yaml b/python/ray/autoscaler/_private/fake_multi_node/example.yaml
index d251de3bc65d..21cedf2cd391 100644
--- a/python/ray/autoscaler/_private/fake_multi_node/example.yaml
+++ b/python/ray/autoscaler/_private/fake_multi_node/example.yaml
@@ -8,6 +8,7 @@ cluster_name: fake_multinode
 max_workers: 8
 provider:
     type: fake_multinode
+    # This must be true since the nodes share the same ip!
     use_node_id_as_ip: True
     disable_node_updaters: True
     disable_launch_config_check: True
diff --git a/python/ray/autoscaler/_private/load_metrics.py b/python/ray/autoscaler/_private/load_metrics.py
index 01f0c12ae3ac..5076942667dd 100644
--- a/python/ray/autoscaler/_private/load_metrics.py
+++ b/python/ray/autoscaler/_private/load_metrics.py
@@ -6,14 +6,17 @@
 
 import numpy as np
 
-import ray._private.ray_constants
 from ray._private.gcs_utils import PlacementGroupTableData
 from ray.autoscaler._private.constants import (
     AUTOSCALER_MAX_RESOURCE_DEMAND_VECTOR_SIZE,
-    MEMORY_RESOURCE_UNIT_BYTES,
+    AUTOSCALER_REPORT_PER_NODE_STATUS,
+)
+from ray.autoscaler._private.util import (
+    DictCount,
+    LoadMetricsSummary,
+    NodeIP,
+    ResourceDict,
 )
-from ray.autoscaler._private.resource_demand_scheduler import NodeIP, ResourceDict
-from ray.autoscaler._private.util import DictCount, LoadMetricsSummary
 from ray.core.generated.common_pb2 import PlacementStrategy
 
 logger = logging.getLogger(__name__)
@@ -52,7 +55,7 @@ def freq_of_dicts(
             is a tuple containing a unique entry from `dicts` and its
             corresponding frequency count.
     """
-    freqs = Counter(map(lambda d: serializer(d), dicts))
+    freqs = Counter(serializer(d) for d in dicts)
     as_list = []
     for as_set, count in freqs.items():
         as_list.append((deserializer(as_set), count))
@@ -281,14 +284,8 @@ def summary(self):
         usage_dict = {}
         for key in total_resources:
             if key in ["memory", "object_store_memory"]:
-                total = (
-                    total_resources[key]
-                    * ray._private.ray_constants.MEMORY_RESOURCE_UNIT_BYTES
-                )
-                available = (
-                    available_resources[key]
-                    * ray._private.ray_constants.MEMORY_RESOURCE_UNIT_BYTES
-                )
+                total = total_resources[key]
+                available = available_resources[key]
                 usage_dict[key] = (total - available, total)
             else:
                 total = total_resources[key]
@@ -323,12 +320,25 @@ def placement_group_deserializer(pg_tuple):
         )
         nodes_summary = freq_of_dicts(self.static_resources_by_ip.values())
 
+        usage_by_node = None
+        if AUTOSCALER_REPORT_PER_NODE_STATUS:
+            usage_by_node = {}
+            for ip, totals in self.static_resources_by_ip.items():
+                available = self.dynamic_resources_by_ip.get(ip, {})
+                usage_by_node[ip] = {}
+                for resource, total in totals.items():
+                    usage_by_node[ip][resource] = (
+                        total - available.get(resource, 0),
+                        total,
+                    )
+
         return LoadMetricsSummary(
             usage=usage_dict,
             resource_demand=summarized_demand_vector,
             pg_demand=summarized_placement_groups,
             request_demand=summarized_resource_requests,
             node_types=nodes_summary,
+            usage_by_node=usage_by_node,
         )
 
     def set_resource_requests(self, requested_resources):
@@ -356,9 +366,7 @@ def _info(self):
 
         def format_resource(key, value):
             if key in ["object_store_memory", "memory"]:
-                return "{} GiB".format(
-                    round(value * MEMORY_RESOURCE_UNIT_BYTES / (1024 * 1024 * 1024), 2)
-                )
+                return "{} GiB".format(round(value / (1024 * 1024 * 1024), 2))
             else:
                 return round(value, 2)
 
diff --git a/python/ray/autoscaler/_private/monitor.py b/python/ray/autoscaler/_private/monitor.py
index d2905945488d..a5b9663edde1 100644
--- a/python/ray/autoscaler/_private/monitor.py
+++ b/python/ray/autoscaler/_private/monitor.py
@@ -336,10 +336,13 @@ def _run(self):
             try:
                 if self.stop_event and self.stop_event.is_set():
                     break
+                gcs_request_start_time = time.time()
                 self.update_load_metrics()
+                gcs_request_time = time.time() - gcs_request_start_time
                 self.update_resource_requests()
                 self.update_event_summary()
                 status = {
+                    "gcs_request_time": gcs_request_time,
                     "load_metrics_report": asdict(self.load_metrics.summary()),
                     "time": time.time(),
                     "monitor_pid": os.getpid(),
@@ -360,6 +363,11 @@ def _run(self):
                     autoscaler_summary = self.autoscaler.summary()
                     if autoscaler_summary:
                         status["autoscaler_report"] = asdict(autoscaler_summary)
+                        status[
+                            "non_terminated_nodes_time"
+                        ] = (
+                            self.autoscaler.non_terminated_nodes.non_terminated_nodes_time  # noqa: E501
+                        )
 
                     for msg in self.event_summarizer.summary():
                         # Need to prefix each line of the message for the lines to
diff --git a/python/ray/autoscaler/_private/resource_demand_scheduler.py b/python/ray/autoscaler/_private/resource_demand_scheduler.py
index 082a9f3e9661..ecbc538b0ee6 100644
--- a/python/ray/autoscaler/_private/resource_demand_scheduler.py
+++ b/python/ray/autoscaler/_private/resource_demand_scheduler.py
@@ -14,7 +14,6 @@
 
 import numpy as np
 
-import ray._private.ray_constants as ray_constants
 from ray._private.gcs_utils import PlacementGroupTableData
 from ray.autoscaler._private.constants import AUTOSCALER_CONSERVE_GPU_NODES
 from ray.autoscaler._private.util import (
@@ -51,7 +50,7 @@ def __init__(
         upscaling_speed: float = 1,
     ) -> None:
         self.provider = provider
-        self.node_types = _convert_memory_unit(node_types)
+        self.node_types = copy.deepcopy(node_types)
         self.node_resource_updated = set()
         self.max_workers = max_workers
         self.head_node_type = head_node_type
@@ -84,7 +83,7 @@ def reset_config(
         inferered resources are not lost.
         """
         self.provider = provider
-        self.node_types = _convert_memory_unit(node_types)
+        self.node_types = copy.deepcopy(node_types)
         self.node_resource_updated = set()
         self.max_workers = max_workers
         self.head_node_type = head_node_type
@@ -530,24 +529,6 @@ def debug_string(
         return out
 
 
-def _convert_memory_unit(
-    node_types: Dict[NodeType, NodeTypeConfigDict]
-) -> Dict[NodeType, NodeTypeConfigDict]:
-    """Convert memory and object_store_memory to memory unit"""
-    node_types = copy.deepcopy(node_types)
-    for node_type in node_types:
-        res = node_types[node_type].get("resources", {})
-        if "memory" in res:
-            size = float(res["memory"])
-            res["memory"] = ray_constants.to_memory_units(size, False)
-        if "object_store_memory" in res:
-            size = float(res["object_store_memory"])
-            res["object_store_memory"] = ray_constants.to_memory_units(size, False)
-        if res:
-            node_types[node_type]["resources"] = res
-    return node_types
-
-
 def _node_type_counts_to_node_resources(
     node_types: Dict[NodeType, NodeTypeConfigDict],
     node_type_counts: Dict[NodeType, int],
diff --git a/python/ray/autoscaler/_private/util.py b/python/ray/autoscaler/_private/util.py
index f9e5dbf2a17a..9011f8c34094 100644
--- a/python/ray/autoscaler/_private/util.py
+++ b/python/ray/autoscaler/_private/util.py
@@ -8,6 +8,7 @@
 import threading
 from dataclasses import dataclass
 from datetime import datetime
+from io import StringIO
 from numbers import Number, Real
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -65,6 +66,8 @@
 # Number of nodes to launch
 NodeCount = int
 
+Usage = Dict[str, Tuple[Number, Number]]
+
 logger = logging.getLogger(__name__)
 
 
@@ -81,7 +84,7 @@ def is_placement_group_resource(resource_name: str) -> bool:
 @dataclass
 class LoadMetricsSummary:
     # Map of resource name (e.g. "memory") to pair of (Used, Available) numbers
-    usage: Dict[str, Tuple[Number, Number]]
+    usage: Usage
     # Counts of demand bundles from task/actor demand.
     # e.g. [({"CPU": 1}, 5), ({"GPU":1}, 2)]
     resource_demand: List[DictCount]
@@ -90,8 +93,12 @@ class LoadMetricsSummary:
     # Counts of demand bundles requested by autoscaler.sdk.request_resources
     request_demand: List[DictCount]
     node_types: List[DictCount]
-    # Optionally included for backwards compatibility: IP of the head node.
+    # Optionally included for backwards compatibility: IP of the head node. See
+    # https://github.com/ray-project/ray/pull/20623 for details.
     head_ip: Optional[NodeIP] = None
+    # Optionally included for backwards compatibility: Resource breakdown by
+    # node. Mapping from node id to resource usage.
+    usage_by_node: Optional[Dict[str, Usage]] = None
 
 
 class ConcurrentCounter:
@@ -522,11 +529,11 @@ def parse_placement_group_resource_str(
     return (placement_group_resource_str, None, True)
 
 
-def get_usage_report(lm_summary: LoadMetricsSummary) -> str:
+def parse_usage(usage: Usage) -> List[str]:
     # first collect resources used in placement groups
     placement_group_resource_usage = {}
     placement_group_resource_total = collections.defaultdict(float)
-    for resource, (used, total) in lm_summary.usage.items():
+    for resource, (used, total) in usage.items():
         (pg_resource_name, pg_name, is_countable) = parse_placement_group_resource_str(
             resource
         )
@@ -537,9 +544,8 @@ def get_usage_report(lm_summary: LoadMetricsSummary) -> str:
                 placement_group_resource_usage[pg_resource_name] += used
                 placement_group_resource_total[pg_resource_name] += total
             continue
-
     usage_lines = []
-    for resource, (used, total) in sorted(lm_summary.usage.items()):
+    for resource, (used, total) in sorted(usage.items()):
         if "node:" in resource:
             continue  # Skip the auto-added per-node "node:<ip>" resource.
 
@@ -561,7 +567,7 @@ def get_usage_report(lm_summary: LoadMetricsSummary) -> str:
 
         if resource in ["memory", "object_store_memory"]:
             to_GiB = 1 / 2 ** 30
-            line = f" {(used * to_GiB):.2f}/" f"{(total * to_GiB):.3f} GiB {resource}"
+            line = f"{(used * to_GiB):.2f}/" f"{(total * to_GiB):.3f} GiB {resource}"
             if used_in_pg:
                 line = line + (
                     f" ({(pg_used * to_GiB):.2f} used of "
@@ -569,14 +575,22 @@ def get_usage_report(lm_summary: LoadMetricsSummary) -> str:
                 )
             usage_lines.append(line)
         else:
-            line = f" {used}/{total} {resource}"
+            line = f"{used}/{total} {resource}"
             if used_in_pg:
                 line += (
                     f" ({pg_used} used of " f"{pg_total} reserved in placement groups)"
                 )
             usage_lines.append(line)
-    usage_report = "\n".join(usage_lines)
-    return usage_report
+    return usage_lines
+
+
+def get_usage_report(lm_summary: LoadMetricsSummary) -> str:
+    usage_lines = parse_usage(lm_summary.usage)
+
+    sio = StringIO()
+    for line in usage_lines:
+        print(f" {line}", file=sio)
+    return sio.getvalue()
 
 
 def format_resource_demand_summary(
@@ -647,11 +661,42 @@ def get_demand_report(lm_summary: LoadMetricsSummary):
     return demand_report
 
 
-def format_info_string(lm_summary, autoscaler_summary, time=None):
+def get_per_node_breakdown(lm_summary: LoadMetricsSummary):
+    sio = StringIO()
+
+    print(file=sio)
+    for node_ip, usage in lm_summary.usage_by_node.items():
+        print(file=sio)  # Print a newline.
+        print(f"Node: {node_ip}", file=sio)
+        print(" Usage:", file=sio)
+        for line in parse_usage(usage):
+            print(f"  {line}", file=sio)
+
+    return sio.getvalue()
+
+
+def format_info_string(
+    lm_summary,
+    autoscaler_summary,
+    time=None,
+    gcs_request_time: Optional[float] = None,
+    non_terminated_nodes_time: Optional[float] = None,
+    verbose: bool = False,
+):
     if time is None:
         time = datetime.now()
     header = "=" * 8 + f" Autoscaler status: {time} " + "=" * 8
     separator = "-" * len(header)
+    if verbose:
+        header += "\n"
+        if gcs_request_time:
+            header += f"GCS request time: {gcs_request_time:3f}s\n"
+        if non_terminated_nodes_time:
+            header += (
+                "Node Provider non_terminated_nodes time: "
+                f"{non_terminated_nodes_time:3f}s\n"
+            )
+
     available_node_report_lines = []
     for node_type, count in autoscaler_summary.active_nodes.items():
         line = f" {count} {node_type}"
@@ -717,13 +762,15 @@ def format_info_string(lm_summary, autoscaler_summary, time=None):
 
 Resources
 {separator}
-Usage:
+{"Total " if verbose else ""}Usage:
 {usage_report}
-
-Demands:
+{"Total " if verbose else ""}Demands:
 {demand_report}"""
 
-    return formatted_output
+    if verbose and lm_summary.usage_by_node:
+        formatted_output += get_per_node_breakdown(lm_summary)
+
+    return formatted_output.strip()
 
 
 def format_readonly_node_type(node_id: str):
diff --git a/python/ray/data/datasource/partitioning.py b/python/ray/data/datasource/partitioning.py
index ac9fffe1f3a3..08e98fea0b14 100644
--- a/python/ray/data/datasource/partitioning.py
+++ b/python/ray/data/datasource/partitioning.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 import posixpath
 from enum import Enum
 from typing import (
@@ -35,67 +36,43 @@ class PartitionStyle(str, Enum):
 
 
 @DeveloperAPI
+@dataclass
 class Partitioning:
     """Partition scheme used to describe path-based partitions.
 
     Path-based partition formats embed all partition keys and values directly in
     their dataset file paths.
+
+    Attributes:
+        style: The partition style - may be either HIVE or DIRECTORY.
+        base_dir: "/"-delimited base directory that all partitioned paths should
+            exist under (exclusive). File paths either outside of, or at the first
+            level of, this directory will be considered unpartitioned. Specify
+            `None` or an empty string to search for partitions in all file path
+            directories.
+        field_names: The partition key field names (i.e. column names for tabular
+            datasets). When non-empty, the order and length of partition key
+            field names must match the order and length of partition values.
+            Required when parsing DIRECTORY partitioned paths or generating
+            HIVE partitioned paths.
+        filesystem: Filesystem that will be used for partition path file I/O.
     """
 
-    def __init__(
-        self,
-        style: PartitionStyle,
-        base_dir: Optional[str] = None,
-        field_names: Optional[List[str]] = None,
-        filesystem: Optional["pyarrow.fs.FileSystem"] = None,
-    ):
-        """Creates a new path-based dataset partition scheme.
+    style: PartitionStyle
+    base_dir: Optional[str] = None
+    field_names: Optional[List[str]] = None
+    filesystem: Optional["pyarrow.fs.FileSystem"] = None
 
-        Args:
-            style: The partition style - may be either HIVE or DIRECTORY.
-            base_dir: "/"-delimited base directory that all partitioned paths should
-                exist under (exclusive). File paths either outside of, or at the first
-                level of, this directory will be considered unpartitioned. Specify
-                `None` or an empty string to search for partitions in all file path
-                directories.
-            field_names: The partition key field names (i.e. column names for tabular
-                datasets). When non-empty, the order and length of partition key
-                field names must match the order and length of partition values.
-                Required when parsing DIRECTORY partitioned paths or generating
-                HIVE partitioned paths.
-            filesystem: Filesystem that will be used for partition path file I/O.
-        """
-        self._style = style
-        self._base_dir = base_dir or ""
-        self._field_names = field_names
-        self._filesystem = filesystem
+    def __post_init__(self):
+        if self.base_dir is None:
+            self.base_dir = ""
         self._normalize_base_dir()
 
-    @property
-    def style(self) -> PartitionStyle:
-        """Gets the path partitioning style."""
-        return self._style
-
-    @property
-    def base_dir(self) -> str:
-        """Gets the original base directory supplied during object construction."""
-        return self._base_dir
-
     @property
     def normalized_base_dir(self) -> str:
         """Returns the base directory normalized for compatibility with a filesystem."""
         return self._normalized_base_dir
 
-    @property
-    def field_names(self) -> Optional[List[str]]:
-        """Gets the partition key field names."""
-        return self._field_names
-
-    @property
-    def filesystem(self) -> Optional["pyarrow.fs.FileSystem"]:
-        """Gets the original filesystem supplied during object construction."""
-        return self._filesystem
-
     @property
     def resolved_filesystem(self) -> "pyarrow.fs.FileSystem":
         """Returns the filesystem resolved for compatibility with a base directory."""
@@ -114,8 +91,8 @@ def _normalize_base_dir(self):
         )
 
         paths, self._resolved_filesystem = _resolve_paths_and_filesystem(
-            self._base_dir,
-            self._filesystem,
+            self.base_dir,
+            self.filesystem,
         )
         assert (
             len(paths) == 1
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 694f89a47c0b..e830ea6d3573 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -340,10 +340,25 @@ def read_parquet(
         >>> # Read multiple local files.
         >>> ray.data.read_parquet(["/path/to/file1", "/path/to/file2"]) # doctest: +SKIP
 
+        >>> # Specify a schema for the parquet file.
+        >>> import pyarrow as pa
+        >>> fields = [("sepal.length", pa.float64()),
+        ...           ("sepal.width", pa.float64()),
+        ...           ("petal.length", pa.float64()),
+        ...           ("petal.width", pa.float64()),
+        ...           ("variety", pa.string())]
+        >>> ray.data.read_parquet("example://iris.parquet",
+        ...     schema=pa.schema(fields))
+        Dataset(num_blocks=..., num_rows=150, schema={sepal.length: double, ...})
+
+        For further arguments you can pass to pyarrow as a keyword argument, see
+        https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
+
     Args:
         paths: A single file path or directory, or a list of file paths. Multiple
             directories are not supported.
-        filesystem: The filesystem implementation to read from.
+        filesystem: The filesystem implementation to read from. These are specified in
+            https://arrow.apache.org/docs/python/api/filesystems.html#filesystem-implementations.
         columns: A list of column names to read.
         parallelism: The requested parallelism of the read. Parallelism may be
             limited by the number of files of the dataset.
@@ -356,7 +371,8 @@ def read_parquet(
             `arr.tobytes()`).
         meta_provider: File metadata provider. Custom metadata providers may
             be able to resolve file metadata more quickly and/or accurately.
-        arrow_parquet_args: Other parquet read options to pass to pyarrow.
+        arrow_parquet_args: Other parquet read options to pass to pyarrow, see
+            https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
 
     Returns:
         Dataset holding Arrow records read from the specified paths.
diff --git a/python/ray/ray_operator/operator_utils.py b/python/ray/ray_operator/operator_utils.py
index 1ec79c607182..a5b73f5e3169 100644
--- a/python/ray/ray_operator/operator_utils.py
+++ b/python/ray/ray_operator/operator_utils.py
@@ -9,6 +9,7 @@
 from kubernetes.watch import Watch
 
 from ray._private import ray_constants
+from ray._private.usage import usage_constants
 from ray.autoscaler._private._kubernetes import custom_objects_api
 from ray.autoscaler._private._kubernetes.node_provider import head_service_selector
 from ray.autoscaler._private.providers import _get_default_config
@@ -137,6 +138,14 @@ def get_node_types(
         if name == cluster_resource["spec"]["headPodType"]:
             if "labels" not in metadata:
                 metadata["labels"] = {}
+        # Insert env identifying legacy operator for telemetry.
+        env = node_type["node_config"]["spec"]["containers"][0].setdefault("env", [])
+        env.append(
+            {
+                "name": usage_constants.LEGACY_RAY_OPERATOR_ENV,
+                "value": "1",
+            }
+        )
         node_types[name] = node_type
     return node_types
 
diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py
index 840c7171640f..12069235e15f 100644
--- a/python/ray/scripts/scripts.py
+++ b/python/ray/scripts/scripts.py
@@ -1861,8 +1861,16 @@ def memory(
     default=ray_constants.REDIS_DEFAULT_PASSWORD,
     help="Connect to ray with redis_password.",
 )
+@click.option(
+    "-v",
+    "--verbose",
+    required=False,
+    is_flag=True,
+    hidden=True,
+    help="Experimental: Display additional debuggging information.",
+)
 @PublicAPI
-def status(address, redis_password):
+def status(address: str, redis_password: str, verbose: bool):
     """Print cluster status, including autoscaling info."""
     address = services.canonicalize_bootstrap_address_or_die(address)
     if not ray._private.gcs_utils.check_health(address):
@@ -1876,7 +1884,7 @@ def status(address, redis_password):
     error = ray.experimental.internal_kv._internal_kv_get(
         ray_constants.DEBUG_AUTOSCALING_ERROR
     )
-    print(debug_status(status, error))
+    print(debug_status(status, error, verbose=verbose))
 
 
 @cli.command(hidden=True)
diff --git a/python/ray/tests/test_k8s_operator_unit_tests.py b/python/ray/tests/test_k8s_operator_unit_tests.py
index b21f83dd1ec7..c27e49a06fd9 100644
--- a/python/ray/tests/test_k8s_operator_unit_tests.py
+++ b/python/ray/tests/test_k8s_operator_unit_tests.py
@@ -134,6 +134,18 @@ def custom_resources():
 
 
 class OperatorTest(unittest.TestCase):
+    def test_env_var_configured(self):
+        cr, _ = custom_resources()
+        config = cr_to_config(cr)
+        for node_type in config["available_node_types"].values():
+            pod_config = node_type["node_config"]
+            expected_env = {
+                "name": "RAY_USAGE_STATS_LEGACY_OPERATOR_IN_USE",
+                "value": "1",
+            }
+            envs = pod_config["spec"]["containers"][0]["env"]
+            assert expected_env in envs
+
     def test_no_file_mounts_k8s_operator_cluster_launch(self):
         with patch.object(NodeUpdaterThread, START, mock_start), patch.object(
             NodeUpdaterThread, JOIN, mock_join
diff --git a/python/ray/tests/test_metrics.py b/python/ray/tests/test_metrics.py
index 52b3915785e0..56dc2abb8b3f 100644
--- a/python/ray/tests/test_metrics.py
+++ b/python/ray/tests/test_metrics.py
@@ -121,7 +121,7 @@ def test_prometheus_endpoint():
             response = requests.get(
                 "http://localhost:{}".format(metrics_export_port),
                 # Fail the request early on if connection timeout
-                timeout=0.01,
+                timeout=1.0,
             )
             return response.status_code == 200
 
@@ -129,7 +129,7 @@ def test_prometheus_endpoint():
             test_prometheus_endpoint,
             (requests.exceptions.ConnectionError,),
             # The dashboard takes more than 2s to startup.
-            timeout_ms=5000,
+            timeout_ms=10 * 1000,
         )
 
 
diff --git a/python/ray/tests/test_resource_demand_scheduler.py b/python/ray/tests/test_resource_demand_scheduler.py
index fef519847c33..114c2a35a374 100644
--- a/python/ray/tests/test_resource_demand_scheduler.py
+++ b/python/ray/tests/test_resource_demand_scheduler.py
@@ -8,6 +8,7 @@
 from datetime import datetime
 from time import sleep
 from unittest import mock
+import subprocess
 
 import pytest
 import yaml
@@ -59,6 +60,7 @@
     fill_in_raylet_ids,
     mock_raylet_id,
 )
+from ray.cluster_utils import AutoscalingCluster
 
 GET_DEFAULT_METHOD = "ray.autoscaler._private.util._get_default_config"
 
@@ -2657,6 +2659,176 @@ def test_info_string():
     assert expected == actual
 
 
+def test_info_string_verbose():
+    lm_summary = LoadMetricsSummary(
+        usage={
+            "CPU": (530.0, 544.0),
+            "GPU": (2, 2),
+            "AcceleratorType:V100": (1, 2),
+            "memory": (2 * 2 ** 30, 2 ** 33),
+            "object_store_memory": (3.14 * 2 ** 30, 2 ** 34),
+        },
+        resource_demand=[({"CPU": 1}, 150)],
+        pg_demand=[({"bundles": [({"CPU": 4}, 5)], "strategy": "PACK"}, 420)],
+        request_demand=[({"CPU": 16}, 100)],
+        node_types=[],
+        usage_by_node={
+            "192.168.1.1": {
+                "CPU": (5.0, 20.0),
+                "GPU": (0.7, 1),
+                "AcceleratorType:V100": (0.1, 1),
+                "memory": (2 ** 30, 2 ** 32),
+                "object_store_memory": (3.14 * 2 ** 30, 2 ** 32),
+            },
+            "192.168.1.2": {
+                "CPU": (15.0, 20.0),
+                "GPU": (0.3, 1),
+                "AcceleratorType:V100": (0.9, 1),
+                "memory": (2 ** 30, 1.5 * 2 ** 33),
+                "object_store_memory": (0, 2 ** 32),
+            },
+        },
+    )
+    autoscaler_summary = AutoscalerSummary(
+        active_nodes={"p3.2xlarge": 2, "m4.4xlarge": 20},
+        pending_nodes=[
+            ("1.2.3.4", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
+            ("1.2.3.5", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
+        ],
+        pending_launches={"m4.4xlarge": 2},
+        failed_nodes=[("1.2.3.6", "p3.2xlarge")],
+    )
+
+    expected = """
+======== Autoscaler status: 2020-12-28 01:02:03 ========
+GCS request time: 3.141500s
+Node Provider non_terminated_nodes time: 1.618000s
+
+Node status
+--------------------------------------------------------
+Healthy:
+ 2 p3.2xlarge
+ 20 m4.4xlarge
+Pending:
+ m4.4xlarge, 2 launching
+ 1.2.3.4: m4.4xlarge, waiting-for-ssh
+ 1.2.3.5: m4.4xlarge, waiting-for-ssh
+Recent failures:
+ p3.2xlarge: RayletUnexpectedlyDied (ip: 1.2.3.6)
+
+Resources
+--------------------------------------------------------
+Total Usage:
+ 1/2 AcceleratorType:V100
+ 530.0/544.0 CPU
+ 2/2 GPU
+ 2.00/8.000 GiB memory
+ 3.14/16.000 GiB object_store_memory
+
+Total Demands:
+ {'CPU': 1}: 150+ pending tasks/actors
+ {'CPU': 4} * 5 (PACK): 420+ pending placement groups
+ {'CPU': 16}: 100+ from request_resources()
+
+Node: 192.168.1.1
+ Usage:
+  0.1/1 AcceleratorType:V100
+  5.0/20.0 CPU
+  0.7/1 GPU
+  1.00/4.000 GiB memory
+  3.14/4.000 GiB object_store_memory
+
+Node: 192.168.1.2
+ Usage:
+  0.9/1 AcceleratorType:V100
+  15.0/20.0 CPU
+  0.3/1 GPU
+  1.00/12.000 GiB memory
+  0.00/4.000 GiB object_store_memory
+""".strip()
+    actual = format_info_string(
+        lm_summary,
+        autoscaler_summary,
+        time=datetime(year=2020, month=12, day=28, hour=1, minute=2, second=3),
+        gcs_request_time=3.1415,
+        non_terminated_nodes_time=1.618,
+        verbose=True,
+    )
+    print(actual)
+    assert expected == actual
+
+
+def test_info_string_verbose_no_breakdown():
+    """
+    Test the verbose string but with node reporting feature flagged off.
+    """
+    lm_summary = LoadMetricsSummary(
+        usage={
+            "CPU": (530.0, 544.0),
+            "GPU": (2, 2),
+            "AcceleratorType:V100": (1, 2),
+            "memory": (2 * 2 ** 30, 2 ** 33),
+            "object_store_memory": (3.14 * 2 ** 30, 2 ** 34),
+        },
+        resource_demand=[({"CPU": 1}, 150)],
+        pg_demand=[({"bundles": [({"CPU": 4}, 5)], "strategy": "PACK"}, 420)],
+        request_demand=[({"CPU": 16}, 100)],
+        node_types=[],
+        usage_by_node=None,
+    )
+    autoscaler_summary = AutoscalerSummary(
+        active_nodes={"p3.2xlarge": 2, "m4.4xlarge": 20},
+        pending_nodes=[
+            ("1.2.3.4", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
+            ("1.2.3.5", "m4.4xlarge", STATUS_WAITING_FOR_SSH),
+        ],
+        pending_launches={"m4.4xlarge": 2},
+        failed_nodes=[("1.2.3.6", "p3.2xlarge")],
+    )
+
+    expected = """
+======== Autoscaler status: 2020-12-28 01:02:03 ========
+GCS request time: 3.141500s
+Node Provider non_terminated_nodes time: 1.618000s
+
+Node status
+--------------------------------------------------------
+Healthy:
+ 2 p3.2xlarge
+ 20 m4.4xlarge
+Pending:
+ m4.4xlarge, 2 launching
+ 1.2.3.4: m4.4xlarge, waiting-for-ssh
+ 1.2.3.5: m4.4xlarge, waiting-for-ssh
+Recent failures:
+ p3.2xlarge: RayletUnexpectedlyDied (ip: 1.2.3.6)
+
+Resources
+--------------------------------------------------------
+Total Usage:
+ 1/2 AcceleratorType:V100
+ 530.0/544.0 CPU
+ 2/2 GPU
+ 2.00/8.000 GiB memory
+ 3.14/16.000 GiB object_store_memory
+
+Total Demands:
+ {'CPU': 1}: 150+ pending tasks/actors
+ {'CPU': 4} * 5 (PACK): 420+ pending placement groups
+ {'CPU': 16}: 100+ from request_resources()
+""".strip()
+    actual = format_info_string(
+        lm_summary,
+        autoscaler_summary,
+        time=datetime(year=2020, month=12, day=28, hour=1, minute=2, second=3),
+        gcs_request_time=3.1415,
+        non_terminated_nodes_time=1.618,
+        verbose=True,
+    )
+    print(actual)
+    assert expected == actual
+
+
 def test_info_string_with_launch_failures():
     lm_summary = LoadMetricsSummary(
         usage={
@@ -2831,6 +3003,54 @@ def test_info_string_failed_node_cap():
     assert expected.strip() == actual
 
 
+def test_ray_status_e2e(shutdown_only):
+    cluster = AutoscalingCluster(
+        head_resources={"CPU": 0},
+        worker_node_types={
+            "type-i": {
+                "resources": {"CPU": 1, "fun": 1},
+                "node_config": {},
+                "min_workers": 1,
+                "max_workers": 1,
+            },
+            "type-ii": {
+                "resources": {"CPU": 1, "fun": 100},
+                "node_config": {},
+                "min_workers": 1,
+                "max_workers": 1,
+            },
+        },
+    )
+
+    try:
+        cluster.start()
+        ray.init(address="auto")
+
+        @ray.remote(num_cpus=0, resources={"fun": 2})
+        class Actor:
+            def ping(self):
+                return None
+
+        actor = Actor.remote()
+        ray.get(actor.ping.remote())
+
+        assert "Demands" in subprocess.check_output("ray status", shell=True).decode()
+        assert (
+            "Total Demands"
+            not in subprocess.check_output("ray status", shell=True).decode()
+        )
+        assert (
+            "Total Demands"
+            in subprocess.check_output("ray status -v", shell=True).decode()
+        )
+        assert (
+            "Total Demands"
+            in subprocess.check_output("ray status --verbose", shell=True).decode()
+        )
+    finally:
+        cluster.shutdown()
+
+
 def test_placement_group_match_string():
     assert (
         is_placement_group_resource("bundle_group_ffe7d420752c6e8658638d19ecf2b68c")
diff --git a/python/ray/tests/test_usage_stats.py b/python/ray/tests/test_usage_stats.py
index 58353a16f13b..827cfac1c580 100644
--- a/python/ray/tests/test_usage_stats.py
+++ b/python/ray/tests/test_usage_stats.py
@@ -768,6 +768,19 @@ def test_usage_lib_get_cluster_config_to_report(
     assert cluster_config_to_report.head_node_instance_type is None
     assert cluster_config_to_report.worker_node_instance_types is None
 
+    monkeypatch.setenv("RAY_USAGE_STATS_KUBERAY_IN_USE", "1")
+    cluster_config_to_report = ray_usage_lib.get_cluster_config_to_report(
+        tmp_path / "does_not_exist.yaml"
+    )
+    assert cluster_config_to_report.cloud_provider == "kuberay"
+
+    monkeypatch.delenv("RAY_USAGE_STATS_KUBERAY_IN_USE")
+    monkeypatch.setenv("RAY_USAGE_STATS_LEGACY_OPERATOR_IN_USE", "1")
+    cluster_config_to_report = ray_usage_lib.get_cluster_config_to_report(
+        tmp_path / "does_not_exist.yaml"
+    )
+    assert cluster_config_to_report.cloud_provider == "legacy_ray_operator"
+
 
 @pytest.mark.skipif(
     sys.platform == "win32",
diff --git a/python/ray/tune/experiment/trial.py b/python/ray/tune/experiment/trial.py
index 468d41672824..2787989527df 100644
--- a/python/ray/tune/experiment/trial.py
+++ b/python/ray/tune/experiment/trial.py
@@ -451,6 +451,17 @@ def last_result(self) -> dict:
     def last_result(self, val: dict):
         self._last_result = val
 
+    def get_runner_ip(self) -> Optional[str]:
+        if self.location.hostname:
+            return self.location.hostname
+
+        if not self.runner:
+            return None
+
+        hostname, pid = ray.get(self.runner.get_current_ip_pid.remote())
+        self.location = _Location(hostname, pid)
+        return self.location.hostname
+
     @property
     def logdir(self):
         if not self.relative_logdir:
diff --git a/python/ray/tune/search/hyperopt/hyperopt_search.py b/python/ray/tune/search/hyperopt/hyperopt_search.py
index 5924f1909698..802ead66add1 100644
--- a/python/ray/tune/search/hyperopt/hyperopt_search.py
+++ b/python/ray/tune/search/hyperopt/hyperopt_search.py
@@ -40,6 +40,13 @@
 logger = logging.getLogger(__name__)
 
 
+HYPEROPT_UNDEFINED_DETAILS = (
+    " This issue can also come up with HyperOpt if your search space only "
+    "contains constant variables, which is not supported by HyperOpt. In that case, "
+    "don't pass any searcher or add sample variables to the search space."
+)
+
+
 class HyperOptSearch(Searcher):
     """A wrapper around HyperOpt to provide trial suggestions.
 
@@ -192,6 +199,14 @@ def __init__(
     def _setup_hyperopt(self) -> None:
         from hyperopt.fmin import generate_trials_to_calculate
 
+        if not self._space:
+            raise RuntimeError(
+                UNDEFINED_SEARCH_SPACE.format(
+                    cls=self.__class__.__name__, space="space"
+                )
+                + HYPEROPT_UNDEFINED_DETAILS
+            )
+
         if self._metric is None and self._mode:
             # If only a mode was passed, use anonymous metric
             self._metric = DEFAULT_METRIC
@@ -283,6 +298,7 @@ def suggest(self, trial_id: str) -> Optional[Dict]:
                 UNDEFINED_SEARCH_SPACE.format(
                     cls=self.__class__.__name__, space="space"
                 )
+                + HYPEROPT_UNDEFINED_DETAILS
             )
         if not self._metric or not self._mode:
             raise RuntimeError(
diff --git a/python/ray/tune/stopper/stopper.py b/python/ray/tune/stopper/stopper.py
index e1b0b594b998..0a29b1fed35d 100644
--- a/python/ray/tune/stopper/stopper.py
+++ b/python/ray/tune/stopper/stopper.py
@@ -11,29 +11,30 @@ class Stopper(abc.ABC):
     default, this class does not stop any trials. Subclasses need to
     implement ``__call__`` and ``stop_all``.
 
-    .. code-block:: python
-
-        import time
-        from ray import air, tune
-        from ray.tune import Stopper
-
-        class TimeStopper(Stopper):
-            def __init__(self):
-                self._start = time.time()
-                self._deadline = 300
-
-            def __call__(self, trial_id, result):
-                return False
-
-            def stop_all(self):
-                return time.time() - self._start > self.deadline
-
-        tuner = Tuner(
-            Trainable,
-            tune_config=tune.TuneConfig(num_samples=200),
-            run_config=air.RunConfig(stop=TimeStopper())
-        )
-        tuner.fit()
+    Examples:
+
+        >>> import time
+        >>> from ray import air, tune
+        >>> from ray.tune import Stopper
+        >>>
+        >>> class TimeStopper(Stopper):
+        ...     def __init__(self):
+        ...         self._start = time.time()
+        ...         self._deadline = 5
+        ...
+        ...     def __call__(self, trial_id, result):
+        ...         return False
+        ...
+        ...     def stop_all(self):
+        ...         return time.time() - self._start > self._deadline
+        >>>
+        >>> tuner = tune.Tuner(
+        ...     tune.Trainable,
+        ...     tune_config=tune.TuneConfig(num_samples=200),
+        ...     run_config=air.RunConfig(stop=TimeStopper())
+        ... )
+        >>> tuner.fit()
+        == Status ==...
 
     """
 
@@ -53,23 +54,22 @@ class CombinedStopper(Stopper):
     Args:
         *stoppers: Stoppers to be combined.
 
-    Example:
-
-    .. code-block:: python
-
-        from ray.tune.stopper import CombinedStopper, \
-            MaximumIterationStopper, TrialPlateauStopper
-
-        stopper = CombinedStopper(
-            MaximumIterationStopper(max_iter=20),
-            TrialPlateauStopper(metric="my_metric")
-        )
-
-        tuner = Tuner(
-            Trainable,
-            run_config=air.RunConfig(stop=stopper)
-        )
-        tuner.fit()
+    Examples:
+
+        >>> from ray.tune.stopper import (CombinedStopper,
+        ...     MaximumIterationStopper, TrialPlateauStopper)
+        >>>
+        >>> stopper = CombinedStopper(
+        ...     MaximumIterationStopper(max_iter=20),
+        ...     TrialPlateauStopper(metric="my_metric")
+        ... )
+        >>>
+        >>> tuner = tune.Tuner(
+        ...     tune.Trainable,
+        ...     run_config=air.RunConfig(stop=stopper)
+        ... )
+        >>> tuner.fit()
+        == Status ==...
 
     """
 
diff --git a/python/ray/tune/syncer.py b/python/ray/tune/syncer.py
index 3a18bdd34b3e..a8fb1026999e 100644
--- a/python/ray/tune/syncer.py
+++ b/python/ray/tune/syncer.py
@@ -27,7 +27,6 @@
 )
 from ray.tune import TuneError
 from ray.tune.callback import Callback
-from ray.tune.result import NODE_IP
 from ray.tune.utils.file_transfer import sync_dir_between_nodes
 from ray.util.annotations import PublicAPI, DeveloperAPI
 from ray.widgets import Template
@@ -500,6 +499,7 @@ def __init__(self, enabled: bool = True, sync_period: float = DEFAULT_SYNC_PERIO
         self._sync_processes: Dict[str, _BackgroundProcess] = {}
         self._sync_times: Dict[str, float] = {}
         self._sync_period = sync_period
+        self._trial_ips = {}
 
     def _get_trial_sync_process(self, trial: "Trial"):
         return self._sync_processes.setdefault(
@@ -537,10 +537,16 @@ def _sync_trial_dir(
         if not force and (not self._should_sync(trial) or sync_process.is_running):
             return False
 
-        if NODE_IP in trial.last_result:
-            source_ip = trial.last_result[NODE_IP]
-        else:
-            source_ip = ray.get(trial.runner.get_current_ip.remote())
+        source_ip = self._trial_ips.get(trial.trial_id, None)
+
+        if not source_ip:
+            source_ip = trial.get_runner_ip()
+
+            # If it still does not exist, the runner is terminated.
+            if not source_ip:
+                return False
+
+        self._trial_ips[trial.trial_id] = source_ip
 
         try:
             sync_process.wait()
@@ -571,6 +577,11 @@ def _sync_trial_dir(
                 )
         return True
 
+    def on_trial_start(
+        self, iteration: int, trials: List["Trial"], trial: "Trial", **info
+    ):
+        self._trial_ips.pop(trial.trial_id, None)
+
     def on_trial_result(
         self,
         iteration: int,
@@ -586,6 +597,13 @@ def on_trial_complete(
     ):
         self._sync_trial_dir(trial, force=True, wait=True)
         self._remove_trial_sync_process(trial)
+        self._trial_ips.pop(trial.trial_id, None)
+
+    def on_trial_error(
+        self, iteration: int, trials: List["Trial"], trial: "Trial", **info
+    ):
+        self._remove_trial_sync_process(trial)
+        self._trial_ips.pop(trial.trial_id, None)
 
     def on_checkpoint(
         self,
@@ -622,3 +640,9 @@ def wait_for_all(self):
                 f"At least one trial failed to sync down when waiting for all "
                 f"trials to sync: \n{sync_str}"
             )
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        for remove in ["_sync_times", "_sync_processes", "_trial_ips"]:
+            state.pop(remove, None)
+        return state
diff --git a/python/ray/tune/tests/test_sample.py b/python/ray/tune/tests/test_sample.py
index 7fbeffd846b6..f40ec7339975 100644
--- a/python/ray/tune/tests/test_sample.py
+++ b/python/ray/tune/tests/test_sample.py
@@ -1071,6 +1071,17 @@ def testConvertHyperOptNested(self):
 
             self.assertIn(config["domain_nested"], ["M", "N", "O", "P"])
 
+    def testConvertHyperOptConstant(self):
+        from ray.tune.search.hyperopt import HyperOptSearch
+
+        config = {"a": 4}
+
+        searcher = HyperOptSearch()
+        with self.assertRaisesRegex(
+            RuntimeError, "This issue can also come up with HyperOpt"
+        ):
+            searcher.set_search_properties(metric="a", mode="max", config=config)
+
     def testSampleBoundsHyperopt(self):
         from ray.tune.search.hyperopt import HyperOptSearch
 
diff --git a/python/ray/tune/tests/test_syncer_callback.py b/python/ray/tune/tests/test_syncer_callback.py
index ffeb2c637f72..582171ef44f5 100644
--- a/python/ray/tune/tests/test_syncer_callback.py
+++ b/python/ray/tune/tests/test_syncer_callback.py
@@ -11,7 +11,6 @@
 from ray.air._internal.checkpoint_manager import CheckpointStorage, _TrackedCheckpoint
 from ray.tune import TuneError
 from ray.tune.logger import NoopLogger
-from ray.tune.result import NODE_IP
 from ray.tune.syncer import (
     DEFAULT_SYNC_PERIOD,
     SyncConfig,
@@ -72,11 +71,14 @@ def assert_file(exists: bool, root: str, path: str):
 class MockTrial:
     def __init__(self, trial_id: str, logdir: str):
         self.trial_id = trial_id
-        self.last_result = {NODE_IP: ray.util.get_node_ip_address()}
         self.uses_cloud_checkpointing = False
         self.sync_on_checkpoint = True
 
         self.logdir = logdir
+        self._local_ip = ray.util.get_node_ip_address()
+
+    def get_runner_ip(self):
+        return self._local_ip
 
 
 class TestSyncerCallback(SyncerCallback):
@@ -211,6 +213,29 @@ def test_syncer_callback_sync(ray_start_2_cpus, temp_data_dirs):
     assert_file(True, tmp_target, "subdir_exclude/something/somewhere.txt")
 
 
+def test_syncer_callback_sync_with_invalid_ip(ray_start_2_cpus, temp_data_dirs):
+    """Check that the sync client updates the IP correctly"""
+    tmp_source, tmp_target = temp_data_dirs
+
+    syncer_callback = TestSyncerCallback(local_logdir_override=tmp_target)
+
+    trial1 = MockTrial(trial_id="a", logdir=tmp_source)
+
+    syncer_callback._trial_ips[trial1.trial_id] = "invalid"
+    syncer_callback.on_trial_start(iteration=0, trials=[], trial=trial1)
+
+    syncer_callback.on_trial_result(iteration=1, trials=[], trial=trial1, result={})
+    syncer_callback.wait_for_all()
+
+    assert_file(True, tmp_target, "level0.txt")
+    assert_file(True, tmp_target, "level0_exclude.txt")
+    assert_file(True, tmp_target, "subdir/level1.txt")
+    assert_file(True, tmp_target, "subdir/level1_exclude.txt")
+    assert_file(True, tmp_target, "subdir/nested/level2.txt")
+    assert_file(True, tmp_target, "subdir_nested_level2_exclude.txt")
+    assert_file(True, tmp_target, "subdir_exclude/something/somewhere.txt")
+
+
 def test_syncer_callback_no_size_limit(temp_data_dirs):
     """Check if max_size_bytes is set to None for sync function"""
     tmp_source, _ = temp_data_dirs
diff --git a/python/ray/tune/trainable/trainable.py b/python/ray/tune/trainable/trainable.py
index d114c27f08ea..5d68a99b1de6 100644
--- a/python/ray/tune/trainable/trainable.py
+++ b/python/ray/tune/trainable/trainable.py
@@ -156,7 +156,7 @@ def __init__(
         self._stderr_file = stderr_file
 
         start_time = time.time()
-        self._local_ip = self.get_current_ip()
+        self._local_ip = ray.util.get_node_ip_address()
         self.setup(copy.deepcopy(self.config))
         setup_time = time.time() - start_time
         if setup_time > SETUP_TIME_THRESHOLD:
@@ -219,9 +219,8 @@ def resource_help(cls, config: Dict):
         """
         return ""
 
-    def get_current_ip(self):
-        self._local_ip = ray.util.get_node_ip_address()
-        return self._local_ip
+    def get_current_ip_pid(self):
+        return self._local_ip, os.getpid()
 
     def get_auto_filled_metrics(
         self,
@@ -689,7 +688,7 @@ def restore(
         self._restored = True
 
         logger.info(
-            "Restored on %s from checkpoint: %s", self.get_current_ip(), checkpoint_dir
+            "Restored on %s from checkpoint: %s", self._local_ip, checkpoint_dir
         )
         state = {
             "_iteration": self._iteration,
diff --git a/python/ray/tune/tuner.py b/python/ray/tune/tuner.py
index f2aac736b201..2bd3e8636134 100644
--- a/python/ray/tune/tuner.py
+++ b/python/ray/tune/tuner.py
@@ -28,6 +28,14 @@
 _SELF = "self"
 
 
+_TUNER_FAILED_MSG = (
+    "The Ray Tune run failed. Please inspect the previous error messages for a "
+    "cause. After fixing the issue, you can restart the run from scratch or "
+    "continue this run. To continue this run, you can use "
+    '`tuner = Tuner.restore("{path}")`.'
+)
+
+
 @PublicAPI(stability="beta")
 class Tuner:
     """Tuner is the recommended way of launching hyperparameter tuning jobs with Ray Tune.
@@ -235,9 +243,9 @@ def fit(self) -> ResultGrid:
                 return self._local_tuner.fit()
             except Exception as e:
                 raise TuneError(
-                    f"Tune run failed. "
-                    f'Please use tuner = Tuner.restore("'
-                    f'{self._local_tuner.get_experiment_checkpoint_dir()}") to resume.'
+                    _TUNER_FAILED_MSG.format(
+                        path=self._local_tuner.get_experiment_checkpoint_dir()
+                    )
                 ) from e
         else:
             experiment_checkpoint_dir = ray.get(
@@ -247,7 +255,5 @@ def fit(self) -> ResultGrid:
                 return ray.get(self._remote_tuner.fit.remote())
             except Exception as e:
                 raise TuneError(
-                    f"Tune run failed. "
-                    f'Please use tuner = Tuner.restore("'
-                    f'{experiment_checkpoint_dir}") to resume.'
+                    _TUNER_FAILED_MSG.format(path=experiment_checkpoint_dir)
                 ) from e
diff --git a/python/ray/tune/utils/resource_updater.py b/python/ray/tune/utils/resource_updater.py
index 0b416b83cb36..fb9a795b799b 100644
--- a/python/ray/tune/utils/resource_updater.py
+++ b/python/ray/tune/utils/resource_updater.py
@@ -4,7 +4,6 @@
 from typing import Any, Dict, Optional
 
 import ray
-from ray._private import ray_constants
 from ray._private.resource_spec import NODE_ID_PREFIX
 from ray.tune.resources import Resources
 
@@ -69,10 +68,8 @@ def update_avail_resources(self, num_retries=5):
         resources = resources.copy()
         num_cpus = resources.pop("CPU", 0)
         num_gpus = resources.pop("GPU", 0)
-        memory = ray_constants.from_memory_units(resources.pop("memory", 0))
-        object_store_memory = ray_constants.from_memory_units(
-            resources.pop("object_store_memory", 0)
-        )
+        memory = resources.pop("memory", 0)
+        object_store_memory = resources.pop("object_store_memory", 0)
         custom_resources = resources
 
         self._avail_resources = Resources(
diff --git a/python/ray/util/placement_group.py b/python/ray/util/placement_group.py
index 95ec8a669da8..1527b45f5193 100644
--- a/python/ray/util/placement_group.py
+++ b/python/ray/util/placement_group.py
@@ -3,7 +3,6 @@
 
 import ray
 from ray._private.client_mode_hook import client_mode_should_convert, client_mode_wrap
-from ray._private.ray_constants import to_memory_units
 from ray._private.utils import hex_to_binary, get_ray_doc_version
 from ray._raylet import PlacementGroupID
 from ray.util.annotations import DeveloperAPI, PublicAPI
@@ -198,11 +197,6 @@ def placement_group(
                 f"resources with only 0 values. Bundles: {bundles}"
             )
 
-        if "memory" in bundle.keys() and bundle["memory"] > 0:
-            # Make sure the memory resource can be
-            # transformed to memory unit.
-            to_memory_units(bundle["memory"], True)
-
         if "object_store_memory" in bundle.keys():
             warnings.warn(
                 "Setting 'object_store_memory' for"
diff --git a/python/requirements.txt b/python/requirements.txt
index 5f2d46f8a8d2..4d7e8c1c5ff2 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -20,7 +20,7 @@ numpy >= 1.16
 opencensus
 packaging; python_version >= '3.10'
 prometheus_client >= 0.7.1, < 0.14.0
-protobuf >= 3.15.3, < 4.0.0
+protobuf >= 3.15.3, != 3.19.5, < 4.0.0
 py-spy >= 0.2.0
 pydantic >= 1.8, < 1.10.0
 pyyaml
diff --git a/release/requirements.txt b/release/requirements.txt
index 246bb0900652..e94e14a96eee 100644
--- a/release/requirements.txt
+++ b/release/requirements.txt
@@ -11,7 +11,7 @@ toml
 python-dotenv
 expiringdict
 requests
-protobuf >= 3.15.3, < 4.0.0
+protobuf >= 3.15.3, != 3.19.5, < 4.0.0
 pytz
 git+https://github.com/ray-project/xgboost_ray.git#egg=xgboost_ray
 git+https://github.com/ray-project/lightgbm_ray.git#lightgbm_ray
\ No newline at end of file
diff --git a/release/requirements_buildkite.txt b/release/requirements_buildkite.txt
index da7382738b22..b33b56eae6c3 100644
--- a/release/requirements_buildkite.txt
+++ b/release/requirements_buildkite.txt
@@ -2,7 +2,7 @@ anyscale
 click
 boto3
 jinja2
-protobuf >= 3.15.3, < 4.0.0
+protobuf >= 3.15.3, != 3.19.5, < 4.0.0
 pydantic < 1.10.0
 pyyaml
 requests
diff --git a/rllib/algorithms/dqn/dqn_tf_policy.py b/rllib/algorithms/dqn/dqn_tf_policy.py
index b2dacd40bcf2..d479d4a51f85 100644
--- a/rllib/algorithms/dqn/dqn_tf_policy.py
+++ b/rllib/algorithms/dqn/dqn_tf_policy.py
@@ -311,7 +311,7 @@ def build_q_losses(policy: Policy, model, _, train_batch: SampleBatch) -> Tensor
         q_tp1_best,
         q_dist_tp1_best,
         train_batch[PRIO_WEIGHTS],
-        train_batch[SampleBatch.REWARDS],
+        tf.cast(train_batch[SampleBatch.REWARDS], tf.float32),
         tf.cast(train_batch[SampleBatch.DONES], tf.float32),
         config["gamma"],
         config["n_step"],
diff --git a/rllib/algorithms/dqn/tests/test_dqn.py b/rllib/algorithms/dqn/tests/test_dqn.py
index 215995d9aa67..2d8b1218aea7 100644
--- a/rllib/algorithms/dqn/tests/test_dqn.py
+++ b/rllib/algorithms/dqn/tests/test_dqn.py
@@ -58,6 +58,45 @@ def test_dqn_compilation(self):
 
             trainer.stop()
 
+    def test_dqn_compilation_integer_rewards(self):
+        """Test whether DQN can be built on all frameworks.
+        Unlike the previous test, this uses an environment with integer rewards
+        in order to test that type conversions are working correctly."""
+        num_iterations = 1
+        config = (
+            dqn.dqn.DQNConfig()
+            .rollouts(num_rollout_workers=2)
+            .training(num_steps_sampled_before_learning_starts=0)
+        )
+
+        for _ in framework_iterator(config, with_eager_tracing=True):
+            # Double-dueling DQN.
+            print("Double-dueling")
+            plain_config = deepcopy(config)
+            trainer = dqn.DQN(config=plain_config, env="Taxi-v3")
+            for i in range(num_iterations):
+                results = trainer.train()
+                check_train_results(results)
+                print(results)
+
+            check_compute_single_action(trainer)
+            trainer.stop()
+
+            # Rainbow.
+            print("Rainbow")
+            rainbow_config = deepcopy(config).training(
+                num_atoms=10, noisy=True, double_q=True, dueling=True, n_step=5
+            )
+            trainer = dqn.DQN(config=rainbow_config, env="Taxi-v3")
+            for i in range(num_iterations):
+                results = trainer.train()
+                check_train_results(results)
+                print(results)
+
+            check_compute_single_action(trainer)
+
+            trainer.stop()
+
     def test_dqn_exploration_and_soft_q_config(self):
         """Tests, whether a DQN Agent outputs exploration/softmaxed actions."""
         config = (
diff --git a/rllib/algorithms/simple_q/simple_q_tf_policy.py b/rllib/algorithms/simple_q/simple_q_tf_policy.py
index 54b95f1a39be..6567606ebc63 100644
--- a/rllib/algorithms/simple_q/simple_q_tf_policy.py
+++ b/rllib/algorithms/simple_q/simple_q_tf_policy.py
@@ -163,7 +163,7 @@ def loss(
 
             # compute RHS of bellman equation
             q_t_selected_target = (
-                train_batch[SampleBatch.REWARDS]
+                tf.cast(train_batch[SampleBatch.REWARDS], tf.float32)
                 + self.config["gamma"] * q_tp1_best_masked
             )
 
diff --git a/rllib/evaluation/worker_set.py b/rllib/evaluation/worker_set.py
index a37d411aafaa..e597ab15ee15 100644
--- a/rllib/evaluation/worker_set.py
+++ b/rllib/evaluation/worker_set.py
@@ -173,6 +173,8 @@ def __init__(
                     env_creator=env_creator,
                     validate_env=validate_env,
                     policy_cls=self._policy_class,
+                    # Initially, policy_specs will be inferred from config dict.
+                    policy_specs=None,
                     worker_index=0,
                     num_workers=num_workers,
                     config=self._local_config,
@@ -253,6 +255,9 @@ def add_workers(self, num_workers: int, validate: bool = False) -> None:
                     env_creator=self._env_creator,
                     validate_env=None,
                     policy_cls=self._policy_class,
+                    # Setup remote workers with policy_specs inferred from config dict.
+                    # Simply provide None here.
+                    policy_specs=None,
                     worker_index=old_num_workers + i + 1,
                     num_workers=old_num_workers + num_workers,
                     config=self._remote_config,
@@ -333,6 +338,13 @@ def recreate_failed_workers(
                 env_creator=self._env_creator,
                 validate_env=None,
                 policy_cls=self._policy_class,
+                # For recreated remote workers, we need to sync the entire
+                # policy specs dict from local_worker_for_synching.
+                # We can not let self._make_worker() infer policy specs
+                # from self._remote_config dict because custom policies
+                # may be added to both rollout and evaluation workers
+                # while the training job progresses.
+                policy_specs=local_worker_for_synching.policy_dict,
                 worker_index=worker_index,
                 num_workers=len(self._remote_workers),
                 recreated_worker=True,
@@ -340,6 +352,7 @@ def recreate_failed_workers(
             )
 
             # Sync new worker from provided one (or local one).
+            # Restore weights and global variables.
             new_worker.set_weights.remote(
                 weights=local_worker_for_synching.get_weights(),
                 global_vars=local_worker_for_synching.get_global_vars(),
@@ -546,6 +559,7 @@ def _make_worker(
         env_creator: EnvCreator,
         validate_env: Optional[Callable[[EnvType], None]],
         policy_cls: Type[Policy],
+        policy_specs: Optional[Dict[str, PolicySpec]] = None,
         worker_index: int,
         num_workers: int,
         recreated_worker: bool = False,
@@ -638,20 +652,21 @@ def valid_module(class_path):
                 compress_columns=config["output_compress_columns"],
             )
 
-        # Assert everything is correct in "multiagent" config dict (if given).
-        ma_policies = config["multiagent"]["policies"]
-        if ma_policies:
-            for pid, policy_spec in ma_policies.copy().items():
-                assert isinstance(policy_spec, PolicySpec)
-                # Class is None -> Use `policy_cls`.
-                if policy_spec.policy_class is None:
-                    ma_policies[pid].policy_class = policy_cls
-            policies = ma_policies
-
-        # Create a policy_spec (MultiAgentPolicyConfigDict),
-        # even if no "multiagent" setup given by user.
-        else:
-            policies = policy_cls
+        if not policy_specs:
+            # Infer policy specs from multiagent.policies dict.
+            if config["multiagent"]["policies"]:
+                # Make a copy so we don't modify the original multiagent config dict
+                # by accident.
+                policy_specs = config["multiagent"]["policies"].copy()
+                # Assert everything is correct in "multiagent" config dict (if given).
+                for policy_spec in policy_specs.values():
+                    assert isinstance(policy_spec, PolicySpec)
+                    # Class is None -> Use `policy_cls`.
+                    if policy_spec.policy_class is None:
+                        policy_spec.policy_class = policy_cls
+            # Use the only policy class as policy specs.
+            else:
+                policy_specs = policy_cls
 
         if worker_index == 0:
             extra_python_environs = config.get("extra_python_environs_for_driver", None)
@@ -661,7 +676,7 @@ def valid_module(class_path):
         worker = cls(
             env_creator=env_creator,
             validate_env=validate_env,
-            policy_spec=policies,
+            policy_spec=policy_specs,
             policy_mapping_fn=config["multiagent"]["policy_mapping_fn"],
             policies_to_train=config["multiagent"]["policies_to_train"],
             tf_session_creator=(session_creator if config["tf_session_args"] else None),
diff --git a/rllib/examples/custom_train_fn.py b/rllib/examples/custom_train_fn.py
index 149eb5b83ea4..3ac0498fb98a 100644
--- a/rllib/examples/custom_train_fn.py
+++ b/rllib/examples/custom_train_fn.py
@@ -62,3 +62,4 @@ def my_train_fn(config, reporter):
     tuner = tune.Tuner(
         tune.with_resources(my_train_fn, resources=resources), param_space=config
     )
+    tuner.fit()
diff --git a/rllib/tests/test_worker_failures.py b/rllib/tests/test_worker_failures.py
index 71077afd8576..13335e4a73e6 100644
--- a/rllib/tests/test_worker_failures.py
+++ b/rllib/tests/test_worker_failures.py
@@ -6,8 +6,15 @@
 import numpy as np
 
 import ray
+from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
+from ray.rllib.algorithms.a3c import A3CConfig
+from ray.rllib.algorithms.apex_dqn import ApexDQNConfig
+from ray.rllib.algorithms.callbacks import DefaultCallbacks
+from ray.rllib.algorithms.dqn.dqn import DQNConfig
+from ray.rllib.algorithms.impala import ImpalaConfig
 from ray.rllib.algorithms.pg import PG, PGConfig
-from ray.rllib.algorithms.registry import get_algorithm_class
+from ray.rllib.algorithms.pg.pg_torch_policy import PGTorchPolicy
+from ray.rllib.algorithms.ppo.ppo import PPOConfig
 from ray.rllib.env.multi_agent_env import make_multi_agent
 from ray.rllib.examples.env.random_env import RandomEnv
 from ray.rllib.policy.policy import PolicySpec
@@ -160,18 +167,17 @@ def setUpClass(cls) -> None:
     def tearDownClass(cls) -> None:
         ray.shutdown()
 
-    def _do_test_fault_ignore(self, algo: str, config: dict, fail_eval: bool = False):
-        algo_cls = get_algorithm_class(algo)
-
+    def _do_test_fault_ignore(self, config: AlgorithmConfig, fail_eval: bool = False):
         # Test fault handling
-        config["num_workers"] = 2
-        config["ignore_worker_failures"] = True
+        config.num_workers = 2
+        config.ignore_worker_failures = True
+        config.env = "fault_env"
         # Make worker idx=1 fail. Other workers will be ok.
-        config["env_config"] = {"bad_indices": [1]}
+        config.env_config = {"bad_indices": [1]}
         if fail_eval:
-            config["evaluation_num_workers"] = 2
-            config["evaluation_interval"] = 1
-            config["evaluation_config"] = {
+            config.evaluation_num_workers = 2
+            config.evaluation_interval = 1
+            config.evaluation_config = {
                 "ignore_worker_failures": True,
                 "env_config": {
                     # Make worker idx=1 fail. Other workers will be ok.
@@ -181,7 +187,7 @@ def _do_test_fault_ignore(self, algo: str, config: dict, fail_eval: bool = False
             }
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
-            algo = algo_cls(config=config, env="fault_env")
+            algo = config.build()
             result = algo.train()
 
             # Both rollout workers are healthy.
@@ -192,18 +198,17 @@ def _do_test_fault_ignore(self, algo: str, config: dict, fail_eval: bool = False
 
             algo.stop()
 
-    def _do_test_fault_fatal(self, alg, config, fail_eval=False):
-        agent_cls = get_algorithm_class(alg)
-
+    def _do_test_fault_fatal(self, config, fail_eval=False):
         # Test raises real error when out of workers.
-        config["num_workers"] = 2
-        config["ignore_worker_failures"] = False
+        config.num_workers = 2
+        config.ignore_worker_failures = False
+        config.env = "fault_env"
         # Make both worker idx=1 and 2 fail.
-        config["env_config"] = {"bad_indices": [1, 2]}
+        config.env_config = {"bad_indices": [1, 2]}
         if fail_eval:
-            config["evaluation_num_workers"] = 2
-            config["evaluation_interval"] = 1
-            config["evaluation_config"] = {
+            config.evaluation_num_workers = 2
+            config.evaluation_interval = 1
+            config.evaluation_config = {
                 "ignore_worker_failures": False,
                 # Make eval worker (index 1) fail.
                 "env_config": {
@@ -213,19 +218,17 @@ def _do_test_fault_fatal(self, alg, config, fail_eval=False):
             }
 
         for _ in framework_iterator(config, frameworks=("torch", "tf")):
-            a = agent_cls(config=config, env="fault_env")
+            a = config.build()
             self.assertRaises(Exception, lambda: a.train())
             a.stop()
 
-    def _do_test_fault_fatal_but_recreate(self, alg, config):
-        register_env("fault_env", lambda c: FaultInjectEnv(c))
-        agent_cls = get_algorithm_class(alg)
-
+    def _do_test_fault_fatal_but_recreate(self, config):
         # Test raises real error when out of workers.
-        config["num_workers"] = 1
-        config["evaluation_num_workers"] = 1
-        config["evaluation_interval"] = 1
-        config["evaluation_config"] = {
+        config.num_workers = 1
+        config.evaluation_num_workers = 1
+        config.evaluation_interval = 1
+        config.env = "fault_env"
+        config.evaluation_config = {
             "recreate_failed_workers": True,
             # Make eval worker (index 1) fail.
             "env_config": {
@@ -234,7 +237,7 @@ def _do_test_fault_fatal_but_recreate(self, alg, config):
         }
 
         for _ in framework_iterator(config, frameworks=("tf", "tf2", "torch")):
-            a = agent_cls(config=config, env="fault_env")
+            a = config.build()
             # Expect this to go well and all faulty workers are recovered.
             self.assertTrue(
                 not any(
@@ -258,55 +261,65 @@ def _do_test_fault_fatal_but_recreate(self, alg, config):
 
     def test_fatal(self):
         # Test the case where all workers fail (w/o recovery).
-        self._do_test_fault_fatal("PG", {"optimizer": {}})
+        self._do_test_fault_fatal(PGConfig().training(optimizer={}))
 
     def test_async_grads(self):
-        self._do_test_fault_ignore("A3C", {"optimizer": {"grads_per_step": 1}})
+        self._do_test_fault_ignore(
+            A3CConfig().training(optimizer={"grads_per_step": 1})
+        )
 
     def test_async_replay(self):
-        self._do_test_fault_ignore(
-            "APEX",
-            {
-                "num_gpus": 0,
-                "min_sample_timesteps_per_iteration": 1000,
-                "min_time_s_per_iteration": 1,
-                "explore": False,
-                "num_steps_sampled_before_learning_starts": 1000,
-                "target_network_update_freq": 100,
-                "optimizer": {
+        config = (
+            ApexDQNConfig()
+            .training(
+                optimizer={
                     "num_replay_buffer_shards": 1,
                 },
-            },
+            )
+            .rollouts(
+                num_rollout_workers=2,
+            )
+            .reporting(
+                min_sample_timesteps_per_iteration=1000,
+                min_time_s_per_iteration=1,
+            )
+            .resources(num_gpus=0)
+            .exploration(explore=False)
         )
+        config.target_network_update_freq = 100
+        self._do_test_fault_ignore(config=config)
 
     def test_async_samples(self):
-        self._do_test_fault_ignore("IMPALA", {"num_gpus": 0})
+        self._do_test_fault_ignore(ImpalaConfig().resources(num_gpus=0))
 
     def test_sync_replay(self):
-        self._do_test_fault_ignore("DQN", {"min_sample_timesteps_per_iteration": 1})
+        self._do_test_fault_ignore(
+            DQNConfig().reporting(min_sample_timesteps_per_iteration=1)
+        )
 
     def test_multi_g_p_u(self):
         self._do_test_fault_ignore(
-            "PPO",
-            {
-                "num_sgd_iter": 1,
-                "train_batch_size": 10,
-                "rollout_fragment_length": 10,
-                "sgd_minibatch_size": 1,
-            },
+            PPOConfig()
+            .rollouts(rollout_fragment_length=10)
+            .training(
+                train_batch_size=10,
+                sgd_minibatch_size=1,
+                num_sgd_iter=1,
+            )
         )
 
     def test_sync_samples(self):
-        self._do_test_fault_ignore("PG", {"optimizer": {}})
+        self._do_test_fault_ignore(PGConfig().training(optimizer={}))
 
     def test_async_sampling_option(self):
-        self._do_test_fault_ignore("PG", {"optimizer": {}, "sample_async": True})
+        self._do_test_fault_ignore(
+            PGConfig().rollouts(sample_async=True).training(optimizer={})
+        )
 
     def test_eval_workers_failing_ignore(self):
         # Test the case where one eval worker fails, but we chose to ignore.
         self._do_test_fault_ignore(
-            "PG",
-            config={"model": {"fcnet_hiddens": [4]}},
+            PGConfig().training(model={"fcnet_hiddens": [4]}),
             fail_eval=True,
         )
 
@@ -322,13 +335,12 @@ def test_recreate_eval_workers_parallel_to_training_w_async_req_manager(self):
             .training(model={"fcnet_hiddens": [4]})
         )
 
-        self._do_test_fault_fatal_but_recreate("PG", config=config.to_dict())
+        self._do_test_fault_fatal_but_recreate(config)
 
     def test_eval_workers_failing_fatal(self):
         # Test the case where all eval workers fail (w/o recovery).
         self._do_test_fault_fatal(
-            "PG",
-            config={"model": {"fcnet_hiddens": [4]}},
+            PGConfig().training(model={"fcnet_hiddens": [4]}),
             fail_eval=True,
         )
 
@@ -337,27 +349,34 @@ def test_workers_fatal_but_recover(self):
         COUNTER_NAME = "test_workers_fatal_but_recover"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 2,
-            # Worker fault tolerance.
-            "ignore_worker_failures": False,  # Do not ignore
-            "recreate_failed_workers": True,  # But recover.
-            "model": {"fcnet_hiddens": [4]},
-            "env_config": {
-                # Make both worker idx=1 and 2 fail.
-                "bad_indices": [1, 2],
-                # Env throws error between steps 100 and 102.
-                "failure_start_count": 100,
-                "failure_stop_count": 102,
-                "counter": COUNTER_NAME,
-            },
-        }
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=2,
+                ignore_worker_failures=False,  # Do not ignore
+                recreate_failed_workers=True,  # But recover.
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(
+                env="fault_env",
+                env_config={
+                    # Make both worker idx=1 and 2 fail.
+                    "bad_indices": [1, 2],
+                    # Env throws error between steps 100 and 102.
+                    "failure_start_count": 100,
+                    "failure_stop_count": 102,
+                    "counter": COUNTER_NAME,
+                },
+            )
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
             ray.wait([counter.reset.remote()])
 
-            a = PG(config=config, env="fault_env")
+            a = config.build()
 
             # Before train loop, workers are fresh and not recreated.
             self.assertTrue(
@@ -381,40 +400,188 @@ def test_workers_fatal_but_recover(self):
                 )
             )
 
-    def test_eval_workers_fault_but_recover(self):
+    def test_policies_are_restored_on_recovered_worker(self):
+        class AddPolicyCallback(DefaultCallbacks):
+            def __init__(self):
+                super().__init__()
+
+            def on_algorithm_init(self, *, algorithm, **kwargs):
+                # Add a custom policy to algorithm
+                algorithm.add_policy(
+                    policy_id="test_policy",
+                    policy_cls=PGTorchPolicy,
+                    observation_space=gym.spaces.Box(low=0, high=1, shape=(8,)),
+                    action_space=gym.spaces.Discrete(2),
+                    config={},
+                    policy_state=None,
+                    evaluation_workers=True,
+                )
+
         # Counter that will survive restarts.
-        COUNTER_NAME = "test_eval_workers_fault_but_recover"
+        COUNTER_NAME = "test_policies_are_restored_on_recovered_worker"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 2,
-            # Worker fault tolerance.
-            "ignore_worker_failures": True,  # Ignore failure.
-            "recreate_failed_workers": True,  # And recover.
-            "model": {"fcnet_hiddens": [4]},
-            # 2 eval workers.
-            "evaluation_num_workers": 2,
-            "evaluation_interval": 1,
-            "evaluation_config": {
-                "env_config": {
-                    "evaluation": True,
-                    "p_done": 0.0,
-                    "max_episode_len": 20,
-                    # Make both eval workers fail.
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=2,
+                ignore_worker_failures=False,  # Do not ignore
+                recreate_failed_workers=True,  # But recover.
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(
+                env="multi-agent-fault_env",
+                env_config={
+                    # Make both worker idx=1 and 2 fail.
                     "bad_indices": [1, 2],
-                    # Env throws error between steps 10 and 12.
-                    "failure_start_count": 10,
-                    "failure_stop_count": 12,
+                    # Env throws error between steps 100 and 102.
+                    "failure_start_count": 100,
+                    "failure_stop_count": 102,
                     "counter": COUNTER_NAME,
-                }
-            },
-        }
+                },
+            )
+            .evaluation(
+                evaluation_num_workers=1,
+                evaluation_interval=1,
+                evaluation_config={
+                    "ignore_worker_failures": False,
+                    "recreate_failed_workers": True,
+                    # Restart the entire eval worker.
+                    "restart_failed_sub_environments": False,
+                    "env_config": {
+                        "evaluation": True,
+                        # Make eval worker (index 1) fail.
+                        "bad_indices": [1],
+                        "failure_start_count": 10,
+                        "failure_stop_count": 12,
+                        "counter": COUNTER_NAME,
+                    },
+                },
+            )
+            .callbacks(callbacks_class=AddPolicyCallback)
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
             ray.wait([counter.reset.remote()])
 
-            a = PG(config=config, env="fault_env")
+            a = config.build()
+
+            # Should have the custom policy.
+            self.assertIsNotNone(a.get_policy("test_policy"))
+
+            # Before train loop, workers are fresh and not recreated.
+            self.assertTrue(
+                not any(
+                    ray.get(
+                        [is_recreated(worker) for worker in a.workers.remote_workers()]
+                    )
+                )
+            )
+            self.assertTrue(
+                not any(
+                    ray.get(
+                        [
+                            is_recreated(worker)
+                            for worker in a.evaluation_workers.remote_workers()
+                        ]
+                    )
+                )
+            )
+
+            result = a.train()
+
+            self.assertEqual(result["num_healthy_workers"], 2)
+            # Both workers are re-created.
+            self.assertEqual(result["num_recreated_workers"], 2)
+            self.assertTrue(
+                all(
+                    ray.get(
+                        [is_recreated(worker) for worker in a.workers.remote_workers()]
+                    )
+                )
+            )
+            # Eval worker is re-created.
+            self.assertTrue(
+                all(
+                    ray.get(
+                        [
+                            is_recreated(worker)
+                            for worker in a.evaluation_workers.remote_workers()
+                        ]
+                    )
+                )
+            )
+
+            # Let's verify that our custom policy exists on both recovered workers.
+            def has_test_policy(w):
+                return "test_policy" in w.policy_map
+
+            # Rollout worker has test policy.
+            self.assertTrue(
+                all(
+                    ray.get(
+                        [
+                            w.apply.remote(has_test_policy)
+                            for w in a.workers.remote_workers()
+                        ]
+                    )
+                )
+            )
+            # Eval worker has test policy.
+            self.assertTrue(
+                all(
+                    ray.get(
+                        [
+                            w.apply.remote(has_test_policy)
+                            for w in a.evaluation_workers.remote_workers()
+                        ]
+                    )
+                )
+            )
+
+    def test_eval_workers_fault_but_recover(self):
+        # Counter that will survive restarts.
+        COUNTER_NAME = "test_eval_workers_fault_but_recover"
+        counter = Counter.options(name=COUNTER_NAME).remote()
+
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=2,
+                ignore_worker_failures=True,  # Ignore failure.
+                recreate_failed_workers=True,  # And recover
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(env="fault_env")
+            .evaluation(
+                evaluation_num_workers=2,
+                evaluation_interval=1,
+                evaluation_config={
+                    "env_config": {
+                        "evaluation": True,
+                        "p_done": 0.0,
+                        "max_episode_len": 20,
+                        # Make both eval workers fail.
+                        "bad_indices": [1, 2],
+                        # Env throws error between steps 10 and 12.
+                        "failure_start_count": 10,
+                        "failure_stop_count": 12,
+                        "counter": COUNTER_NAME,
+                    },
+                },
+            )
+        )
+
+        for _ in framework_iterator(config, frameworks=("tf2", "torch")):
+            # Reset interaciton counter.
+            ray.wait([counter.reset.remote()])
+
+            a = config.build()
 
             # Before train loop, workers are fresh and not recreated.
             self.assertTrue(
@@ -468,35 +635,43 @@ def test_eval_workers_fault_but_restore_env(self):
         COUNTER_NAME = "test_eval_workers_fault_but_restore_env"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 2,
-            # Worker fault tolerance.
-            "ignore_worker_failures": True,
-            "recreate_failed_workers": True,
-            "model": {"fcnet_hiddens": [4]},
-            "env_config": {
-                # Make both worker idx=1 and 2 fail.
-                "bad_indices": [1, 2],
-                # Env throws error before step 2.
-                "failure_stop_count": 2,
-                "counter": COUNTER_NAME,
-            },
-            # 2 eval workers.
-            "evaluation_num_workers": 2,
-            "evaluation_interval": 1,
-            "evaluation_config": {
-                "ignore_worker_failures": True,
-                "recreate_failed_workers": True,
-                # Now instead of recreating failed workers,
-                # we want to recreate the failed sub env instead.
-                "restart_failed_sub_environments": True,
-                "env_config": {
-                    "evaluation": True,
-                    # Make eval worker (index 1) fail.
-                    "bad_indices": [1],
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=2,
+                ignore_worker_failures=True,  # Ignore failure.
+                recreate_failed_workers=True,  # And recover
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(
+                env="fault_env",
+                env_config={
+                    # Make both worker idx=1 and 2 fail.
+                    "bad_indices": [1, 2],
+                    # Env throws error before step 2.
+                    "failure_stop_count": 2,
+                    "counter": COUNTER_NAME,
                 },
-            },
-        }
+            )
+            .evaluation(
+                evaluation_num_workers=2,
+                evaluation_interval=1,
+                evaluation_config={
+                    "ignore_worker_failures": True,
+                    "recreate_failed_workers": True,
+                    # Now instead of recreating failed workers,
+                    # we want to recreate the failed sub env instead.
+                    "restart_failed_sub_environments": True,
+                    "env_config": {
+                        "evaluation": True,
+                        # Make eval worker (index 1) fail.
+                        "bad_indices": [1],
+                    },
+                },
+            )
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
@@ -554,44 +729,53 @@ def test_multi_agent_env_eval_workers_fault_but_restore_env(self):
         COUNTER_NAME = "test_multi_agent_env_eval_workers_fault_but_restore_env"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 2,
-            "model": {"fcnet_hiddens": [4]},
-            # Workers do not fault and no fault tolerance.
-            "env_config": {},
-            "multiagent": {
-                "policies": {
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=2,
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(
+                env="multi-agent-fault_env",
+                # Workers do not fault and no fault tolerance.
+                env_config={},
+                disable_env_checking=True,
+            )
+            .multi_agent(
+                policies={
                     "main_agent": PolicySpec(),
                 },
-                "policies_to_train": ["main_agent"],
-                "policy_mapping_fn": lambda _: "main_agent",
-            },
-            # 2 eval workers.
-            "evaluation_num_workers": 2,
-            "evaluation_interval": 1,
-            "evaluation_config": {
-                # Now instead of recreating failed workers,
-                # we want to recreate the failed sub env instead.
-                "restart_failed_sub_environments": True,
-                "env_config": {
-                    "evaluation": True,
-                    "p_done": 0.0,
-                    "max_episode_len": 20,
-                    # Make eval worker (index 1) fail.
-                    "bad_indices": [1],
-                    "counter": COUNTER_NAME,
-                    "failure_start_count": 10,
-                    "failure_stop_count": 12,
+                policies_to_train=["main_agent"],
+                policy_mapping_fn=lambda _: "main_agent",
+            )
+            .evaluation(
+                evaluation_num_workers=2,
+                evaluation_interval=1,
+                evaluation_config={
+                    # Now instead of recreating failed workers,
+                    # we want to recreate the failed sub env instead.
+                    "restart_failed_sub_environments": True,
+                    "env_config": {
+                        "evaluation": True,
+                        "p_done": 0.0,
+                        "max_episode_len": 20,
+                        # Make eval worker (index 1) fail.
+                        "bad_indices": [1],
+                        "counter": COUNTER_NAME,
+                        "failure_start_count": 10,
+                        "failure_stop_count": 12,
+                    },
                 },
-            },
-            "disable_env_checking": True,
-        }
+            )
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
             ray.wait([counter.reset.remote()])
 
-            a = PG(config=config, env="multi-agent-fault_env")
+            a = config.build()
 
             result = a.train()
 
@@ -619,37 +803,47 @@ def test_long_failure_period_restore_env(self):
         COUNTER_NAME = "test_long_failure_period_restore_env"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 1,
-            "create_env_on_driver": False,
-            # Worker fault tolerance.
-            "recreate_failed_workers": True,  # Restore failed workers.
-            "restart_failed_sub_environments": True,  # And create failed envs.
-            "model": {"fcnet_hiddens": [4]},
-            "env_config": {
-                "p_done": 0.0,
-                "max_episode_len": 100,
-                "bad_indices": [1],
-                # Env throws error between steps 50 and 150.
-                "failure_start_count": 30,
-                "failure_stop_count": 80,
-                "counter": COUNTER_NAME,
-            },
-            # 2 eval workers.
-            "evaluation_num_workers": 1,
-            "evaluation_interval": 1,
-            "evaluation_config": {
-                "env_config": {
-                    "evaluation": True,
-                }
-            },
-        }
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=1,
+                create_env_on_local_worker=False,
+                # Worker fault tolerance.
+                recreate_failed_workers=True,  # Restore failed workers.
+                restart_failed_sub_environments=True,  # And create failed envs.
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+            )
+            .environment(
+                env="fault_env",
+                # Workers do not fault and no fault tolerance.
+                env_config={
+                    "p_done": 0.0,
+                    "max_episode_len": 100,
+                    "bad_indices": [1],
+                    # Env throws error between steps 50 and 150.
+                    "failure_start_count": 30,
+                    "failure_stop_count": 80,
+                    "counter": COUNTER_NAME,
+                },
+            )
+            .evaluation(
+                evaluation_num_workers=1,
+                evaluation_interval=1,
+                evaluation_config={
+                    "env_config": {
+                        "evaluation": True,
+                    }
+                },
+            )
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
             ray.wait([counter.reset.remote()])
 
-            a = PG(config=config, env="fault_env")
+            a = config.build()
 
             # Before train loop, workers are fresh and not recreated.
             self.assertTrue(
@@ -705,40 +899,50 @@ def test_env_wait_time_workers_restore_env(self):
         COUNTER_NAME = "test_env_wait_time_workers_restore_env"
         counter = Counter.options(name=COUNTER_NAME).remote()
 
-        config = {
-            "num_workers": 1,
-            # Worker fault tolerance.
-            "ignore_worker_failures": False,  # Do not ignore
-            "recreate_failed_workers": True,  # But recover.
-            "restart_failed_sub_environments": True,
-            "model": {"fcnet_hiddens": [4]},
-            "rollout_fragment_length": 10,
-            "train_batch_size": 10,
-            "env_config": {
-                "p_done": 0.0,
-                "max_episode_len": 10,
-                "init_delay": 10,  # 10 sec init delay.
-                # Make both worker idx=1 and 2 fail.
-                "bad_indices": [1],
-                # Env throws error between steps 100 and 102.
-                "failure_start_count": 7,
-                "failure_stop_count": 8,
-                "counter": COUNTER_NAME,
-            },
-            # Use EMA PerfStat.
-            # Really large coeff to show the difference in env_wait_time_ms.
-            # Pretty much consider the last 2 data points.
-            "sampler_perf_stats_ema_coef": 0.5,
-            # Important, don't smooth over all the episodes,
-            # otherwise we don't see latency spike.
-            "metrics_num_episodes_for_smoothing": 1,
-        }
+        config = (
+            PGConfig()
+            .rollouts(
+                num_rollout_workers=1,
+                # Worker fault tolerance.
+                recreate_failed_workers=False,  # Do not ignore.
+                restart_failed_sub_environments=True,  # But recover.
+                rollout_fragment_length=10,
+                # Use EMA PerfStat.
+                # Really large coeff to show the difference in env_wait_time_ms.
+                # Pretty much consider the last 2 data points.
+                sampler_perf_stats_ema_coef=0.5,
+            )
+            .training(
+                model={"fcnet_hiddens": [4]},
+                train_batch_size=10,
+            )
+            .environment(
+                env="fault_env",
+                # Workers do not fault and no fault tolerance.
+                env_config={
+                    "p_done": 0.0,
+                    "max_episode_len": 10,
+                    "init_delay": 10,  # 10 sec init delay.
+                    # Make both worker idx=1 and 2 fail.
+                    "bad_indices": [1],
+                    # Env throws error between steps 100 and 102.
+                    "failure_start_count": 7,
+                    "failure_stop_count": 8,
+                    "counter": COUNTER_NAME,
+                },
+            )
+            .reporting(
+                # Important, don't smooth over all the episodes,
+                # otherwise we don't see latency spike.
+                metrics_num_episodes_for_smoothing=1
+            )
+        )
 
         for _ in framework_iterator(config, frameworks=("tf2", "torch")):
             # Reset interaciton counter.
             ray.wait([counter.reset.remote()])
 
-            a = PG(config=config, env="fault_env")
+            a = config.build()
 
             # Had to restore env during this iteration.
             result = a.train()

From 51de6e7f7f332ee4cc0330624ab261d431decfb4 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:22:45 +0100
Subject: [PATCH 05/61] Update images

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base  | 9 +++++++++
 ci/docker/Dockerfile.build | 7 +++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index 5a74288dc7e9..d79ba374a30f 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -55,3 +55,12 @@ RUN echo "ulimit -c 0" >> /root/.bashrc
 RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
     (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
     cat /root/.bazelrc
+
+# Install some dependencies (miniconda, pip dependencies, etc)
+RUN mkdir /ray
+WORKDIR /ray
+
+# Below should be re-run each time
+COPY . .
+# init also calls install-dependencies.sh
+RUN ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index b6eeb5fec850..96085faa14e8 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,15 +1,18 @@
 FROM [Dockerfile.base image]
 
+# Delete stale data
+RUN rm -rf /ray
+
 RUN mkdir /ray
 WORKDIR /ray
 
 # Below should be re-run each time
 COPY . .
+
+# init also calls install-dependencies.sh
 RUN ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
-RUN (if [ "${INSTALL_DEPENDENCIES}" = "ML" ]; then RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh; fi)
-
 # Run determine test to run
 RUN bash --login -i -c "python ./ci/pipeline/determine_tests_to_run.py --output=json > affected_set.json"
 RUN cat affected_set.json

From 17346d7f74e4178bcbbf7f198258704fc8ef3d75 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:37:57 +0100
Subject: [PATCH 06/61] Py 3.7, no dl dependencies

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base      | 4 ++--
 ci/docker/Dockerfile.build     | 5 +++--
 ci/env/install-dependencies.sh | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index d79ba374a30f..d0069c616f92 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -4,7 +4,7 @@ ARG REMOTE_CACHE_URL
 ARG BUILDKITE_PULL_REQUEST
 ARG BUILDKITE_COMMIT
 ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
-ARG PYTHON=3.6
+ARG PYTHON=3.7
 ARG INSTALL_DEPENDENCIES
 
 ENV DEBIAN_FRONTEND=noninteractive
@@ -63,4 +63,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 # init also calls install-dependencies.sh
-RUN ./ci/ci.sh init
+RUN NO_DL=1 ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 96085faa14e8..58b07844dc0c 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,4 +1,5 @@
-FROM [Dockerfile.base image]
+ARG DOCKER_IMAGE_BASE
+FROM DOCKER_IMAGE_BASE
 
 # Delete stale data
 RUN rm -rf /ray
@@ -10,7 +11,7 @@ WORKDIR /ray
 COPY . .
 
 # init also calls install-dependencies.sh
-RUN ./ci/ci.sh init
+RUN NO_DL=1 ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
 # Run determine test to run
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index 10a9d7fffb1a..d23e2c0ca25f 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -296,7 +296,7 @@ install_pip_packages() {
 
   if [ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]; then
     # Remove this entire section once Serve dependencies are fixed.
-    if [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
+    if [ "${NO_DL-}" != 1 ] && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
       # We want to install the CPU version only.
       pip install -r "${WORKSPACE_DIR}"/python/requirements/ml/requirements_dl.txt
     fi

From c282eb8a5f3ce5bab5910673a665e03c360c09c2 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:42:50 +0100
Subject: [PATCH 07/61] init bash

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base  | 2 +-
 ci/docker/Dockerfile.build | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index d0069c616f92..09005d75cec7 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -63,4 +63,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 # init also calls install-dependencies.sh
-RUN NO_DL=1 ./ci/ci.sh init
+RUN bash --login -i NO_DL=1 ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 58b07844dc0c..d8995a332cdc 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -11,7 +11,7 @@ WORKDIR /ray
 COPY . .
 
 # init also calls install-dependencies.sh
-RUN NO_DL=1 ./ci/ci.sh init
+RUN bash --login -i NO_DL=1 ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
 # Run determine test to run

From 79db05ffc4455457774ef0516e76f45d3da903f2 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:46:55 +0100
Subject: [PATCH 08/61] var

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index d8995a332cdc..519a077de3b9 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,5 +1,5 @@
 ARG DOCKER_IMAGE_BASE
-FROM DOCKER_IMAGE_BASE
+FROM $DOCKER_IMAGE_BASE
 
 # Delete stale data
 RUN rm -rf /ray

From 99787f1ff1498bd390b2922e093d32544f4443cf Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 10:50:22 +0100
Subject: [PATCH 09/61] test update

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base  | 2 +-
 ci/docker/Dockerfile.build | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index 09005d75cec7..d0069c616f92 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -63,4 +63,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 # init also calls install-dependencies.sh
-RUN bash --login -i NO_DL=1 ./ci/ci.sh init
+RUN NO_DL=1 ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 519a077de3b9..f4d65fb3c5ae 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -11,7 +11,7 @@ WORKDIR /ray
 COPY . .
 
 # init also calls install-dependencies.sh
-RUN bash --login -i NO_DL=1 ./ci/ci.sh init
+RUN NO_DL=1 ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
 # Run determine test to run

From 02607a5db3e0e04543f2475074e628b5813ec324 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 11:06:54 +0100
Subject: [PATCH 10/61] login

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index f4d65fb3c5ae..351c2aed40e0 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -11,7 +11,7 @@ WORKDIR /ray
 COPY . .
 
 # init also calls install-dependencies.sh
-RUN NO_DL=1 ./ci/ci.sh init
+RUN NO_DL=1 bash --login -i ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
 # Run determine test to run

From 881c76b0a8b8b4dd890f672ef8ce977bc25e8e72 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 15 Sep 2022 13:17:31 +0200
Subject: [PATCH 11/61] fix faulty changed files list

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 ci/pipeline/determine_tests_to_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
index 32a3a1a40bd9..849c82347047 100644
--- a/ci/pipeline/determine_tests_to_run.py
+++ b/ci/pipeline/determine_tests_to_run.py
@@ -25,7 +25,7 @@ def list_changed_files(commit_range):
         list: List of changed files within the commit range
     """
 
-    command = ["git", "diff", "--name-only", commit_range, "--"]
+    command = ["git", "diff", "--name-only", "--", commit_range]
     out = subprocess.check_output(command)
     return [s.strip() for s in out.decode().splitlines() if s is not None]
 

From a532fb670841a31712f94a6761dea57123dc37d7 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 15 Sep 2022 13:25:08 +0200
Subject: [PATCH 12/61] move out of ray working dir to actually delete it

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 ci/docker/Dockerfile.build | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 351c2aed40e0..ea9566e25cc3 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,6 +1,8 @@
 ARG DOCKER_IMAGE_BASE
 FROM $DOCKER_IMAGE_BASE
 
+# Move out of working dir /ray
+RUN cd /
 # Delete stale data
 RUN rm -rf /ray
 

From b5c33aeb24f4eebe4a56df535f389f76667fedae Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 15 Sep 2022 13:59:07 +0200
Subject: [PATCH 13/61] remove WORKDIR to create fresh ray folder

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 ci/docker/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index ea9566e25cc3..a09e56e1a620 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -2,8 +2,8 @@ ARG DOCKER_IMAGE_BASE
 FROM $DOCKER_IMAGE_BASE
 
 # Move out of working dir /ray
-RUN cd /
 # Delete stale data
+WORKDIR /
 RUN rm -rf /ray
 
 RUN mkdir /ray

From fe0a64c3edd4f38c93bec249e1fdb02a54614642 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 13:58:40 +0100
Subject: [PATCH 14/61] Revert "fix faulty changed files list"

This reverts commit 881c76b0a8b8b4dd890f672ef8ce977bc25e8e72.

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/pipeline/determine_tests_to_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
index 849c82347047..32a3a1a40bd9 100644
--- a/ci/pipeline/determine_tests_to_run.py
+++ b/ci/pipeline/determine_tests_to_run.py
@@ -25,7 +25,7 @@ def list_changed_files(commit_range):
         list: List of changed files within the commit range
     """
 
-    command = ["git", "diff", "--name-only", "--", commit_range]
+    command = ["git", "diff", "--name-only", commit_range, "--"]
     out = subprocess.check_output(command)
     return [s.strip() for s in out.decode().splitlines() if s is not None]
 

From 14e1c49ff7195c2c15dbfdd9d2c4685e2aaf2aa3 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 14:33:56 +0100
Subject: [PATCH 15/61] Update dockerfiles

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base      |  1 -
 ci/docker/Dockerfile.test      | 13 ++++++-
 ci/docker/Dockerfile.test_base | 65 ++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 ci/docker/Dockerfile.test_base

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base
index d0069c616f92..13c07c7e805c 100644
--- a/ci/docker/Dockerfile.base
+++ b/ci/docker/Dockerfile.base
@@ -5,7 +5,6 @@ ARG BUILDKITE_PULL_REQUEST
 ARG BUILDKITE_COMMIT
 ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
 ARG PYTHON=3.7
-ARG INSTALL_DEPENDENCIES
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=America/Los_Angeles
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 9cc9f1a39401..45b4839373f2 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -1 +1,12 @@
-FROM ubuntu:focal
+ARG DOCKER_IMAGE_TEST_BASE
+FROM $DOCKER_IMAGE_TEST_BASE
+
+# Move out of working dir /ray
+# Delete stale data
+WORKDIR /
+RUN rm -rf /ray
+
+RUN mkdir /ray
+WORKDIR /ray
+
+RUN NO_DL=1 NO_BUILD=1 ./env/install-dependencies.sh init
diff --git a/ci/docker/Dockerfile.test_base b/ci/docker/Dockerfile.test_base
new file mode 100644
index 000000000000..5dde21a168d6
--- /dev/null
+++ b/ci/docker/Dockerfile.test_base
@@ -0,0 +1,65 @@
+FROM ubuntu:focal
+
+ARG REMOTE_CACHE_URL
+ARG BUILDKITE_PULL_REQUEST
+ARG BUILDKITE_COMMIT
+ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
+ARG PYTHON=3.7
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=America/Los_Angeles
+
+ENV BUILDKITE=true
+ENV CI=true
+ENV PYTHON=$PYTHON
+ENV RAY_USE_RANDOM_PORTS=1
+ENV RAY_DEFAULT_BUILD=1
+ENV RAY_INSTALL_JAVA=0
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+# For wheel build
+# https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
+ENV DOCKER_TLS_CERTDIR=/certs
+ENV DOCKER_HOST=tcp://docker:2376
+ENV DOCKER_TLS_VERIFY=1
+ENV DOCKER_CERT_PATH=/certs/client
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
+
+RUN apt-get update -qq && apt-get upgrade -qq
+RUN apt-get install -y -qq \
+    curl python-is-python3 git build-essential \
+    sudo unzip unrar apt-utils dialog tzdata wget rsync \
+    language-pack-en tmux cmake gdb vim htop \
+    libgtk2.0-dev zlib1g-dev libgl1-mesa-dev \
+    clang-format-12 jq \
+    clang-tidy-12 clang-12
+# Make using GCC 9 explicit.
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \
+    --slave /usr/bin/gcov gcov /usr/bin/gcov-9
+RUN ln -s /usr/bin/clang-format-12 /usr/bin/clang-format && \
+    ln -s /usr/bin/clang-tidy-12 /usr/bin/clang-tidy && \
+    ln -s /usr/bin/clang-12 /usr/bin/clang
+
+RUN curl -o- https://get.docker.com | sh
+
+# System conf for tests
+RUN locale -a
+ENV LC_ALL=en_US.utf8
+ENV LANG=en_US.utf8
+RUN echo "ulimit -c 0" >> /root/.bashrc
+
+# Setup Bazel caches
+RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
+    (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
+    cat /root/.bazelrc
+
+# Install some dependencies (miniconda, pip dependencies, etc)
+RUN mkdir /ray
+WORKDIR /ray
+
+# Below should be re-run each time
+COPY . .
+
+RUN NO_DL=1 NO_BUILD=1 ./env/install-dependencies.sh init

From d549d96d9a85beb633e351392bf0e20c80d48116 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 14:53:03 +0100
Subject: [PATCH 16/61] Fix paath

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.test      | 3 +++
 ci/docker/Dockerfile.test_base | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 45b4839373f2..54db2750e1f8 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -9,4 +9,7 @@ RUN rm -rf /ray
 RUN mkdir /ray
 WORKDIR /ray
 
+# Copy new ray files
+COPY . .
+
 RUN NO_DL=1 NO_BUILD=1 ./env/install-dependencies.sh init
diff --git a/ci/docker/Dockerfile.test_base b/ci/docker/Dockerfile.test_base
index 5dde21a168d6..4f28ee9a3b82 100644
--- a/ci/docker/Dockerfile.test_base
+++ b/ci/docker/Dockerfile.test_base
@@ -62,4 +62,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 
-RUN NO_DL=1 NO_BUILD=1 ./env/install-dependencies.sh init
+RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init

From bde08d45ab5824759a96e3d98421d589004aa53c Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 15:18:13 +0100
Subject: [PATCH 17/61] Rename docker files

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/{Dockerfile.base => Dockerfile.base_build}     | 0
 ci/docker/{Dockerfile.test_base => Dockerfile.base_test} | 0
 ci/docker/Dockerfile.build                               | 4 ++--
 ci/docker/Dockerfile.test                                | 4 ++--
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename ci/docker/{Dockerfile.base => Dockerfile.base_build} (100%)
 rename ci/docker/{Dockerfile.test_base => Dockerfile.base_test} (100%)

diff --git a/ci/docker/Dockerfile.base b/ci/docker/Dockerfile.base_build
similarity index 100%
rename from ci/docker/Dockerfile.base
rename to ci/docker/Dockerfile.base_build
diff --git a/ci/docker/Dockerfile.test_base b/ci/docker/Dockerfile.base_test
similarity index 100%
rename from ci/docker/Dockerfile.test_base
rename to ci/docker/Dockerfile.base_test
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index a09e56e1a620..8f0e4fbe4bef 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,5 +1,5 @@
-ARG DOCKER_IMAGE_BASE
-FROM $DOCKER_IMAGE_BASE
+ARG DOCKER_IMAGE_BASE_BUILD
+FROM $DOCKER_IMAGE_BASE_BUILD
 
 # Move out of working dir /ray
 # Delete stale data
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 54db2750e1f8..d1aec86d9146 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -1,5 +1,5 @@
-ARG DOCKER_IMAGE_TEST_BASE
-FROM $DOCKER_IMAGE_TEST_BASE
+ARG DOCKER_IMAGE_TEST_BASE_TEST
+FROM $DOCKER_IMAGE_TEST_BASE_TEST
 
 # Move out of working dir /ray
 # Delete stale data

From d5250f01f6ff0463f7900accfcddd49e0bac5908 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 20:14:08 +0100
Subject: [PATCH 18/61] update again

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index d1aec86d9146..6582c3e69633 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -1,5 +1,5 @@
-ARG DOCKER_IMAGE_TEST_BASE_TEST
-FROM $DOCKER_IMAGE_TEST_BASE_TEST
+ARG DOCKER_IMAGE_BASE_TEST
+FROM $DOCKER_IMAGE_BASE_TEST
 
 # Move out of working dir /ray
 # Delete stale data

From 6dc5194b769ebf2e0d84ee0797d0a0565241c3d5 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 20:47:03 +0100
Subject: [PATCH 19/61] Dockerfile.test update

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 6582c3e69633..7a0c114c7a21 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -12,4 +12,4 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-RUN NO_DL=1 NO_BUILD=1 ./env/install-dependencies.sh init
+RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init

From 49ebddeedd4b48e5e2cb3183a8a04d2085d94656 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 21:10:35 +0100
Subject: [PATCH 20/61] Restructure bases, add GPU base

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_build | 60 +----------------------------
 ci/docker/Dockerfile.base_gpu   | 68 +++++++++++++++++++++++++++++++++
 ci/docker/Dockerfile.gpu        |  3 +-
 ci/docker/Dockerfile.ml         |  2 +-
 4 files changed, 73 insertions(+), 60 deletions(-)
 create mode 100644 ci/docker/Dockerfile.base_gpu

diff --git a/ci/docker/Dockerfile.base_build b/ci/docker/Dockerfile.base_build
index 13c07c7e805c..7d0972566092 100644
--- a/ci/docker/Dockerfile.base_build
+++ b/ci/docker/Dockerfile.base_build
@@ -1,65 +1,9 @@
 FROM ubuntu:focal
 
-ARG REMOTE_CACHE_URL
-ARG BUILDKITE_PULL_REQUEST
-ARG BUILDKITE_COMMIT
-ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
-ARG PYTHON=3.7
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV TZ=America/Los_Angeles
-
-ENV BUILDKITE=true
-ENV CI=true
-ENV PYTHON=$PYTHON
-ENV RAY_USE_RANDOM_PORTS=1
-ENV RAY_DEFAULT_BUILD=1
 ENV RAY_INSTALL_JAVA=1
-ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
-ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
-ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
-# For wheel build
-# https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
-ENV DOCKER_TLS_CERTDIR=/certs
-ENV DOCKER_HOST=tcp://docker:2376
-ENV DOCKER_TLS_VERIFY=1
-ENV DOCKER_CERT_PATH=/certs/client
-ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
-ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
-RUN apt-get update -qq && apt-get upgrade -qq
 RUN apt-get install -y -qq \
-    curl python-is-python3 git build-essential \
-    sudo unzip unrar apt-utils dialog tzdata wget rsync \
-    language-pack-en tmux cmake gdb vim htop \
-    libgtk2.0-dev zlib1g-dev libgl1-mesa-dev maven \
-    openjdk-8-jre openjdk-8-jdk clang-format-12 jq \
-    clang-tidy-12 clang-12
-# Make using GCC 9 explicit.
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \
-    --slave /usr/bin/gcov gcov /usr/bin/gcov-9
-RUN ln -s /usr/bin/clang-format-12 /usr/bin/clang-format && \
-    ln -s /usr/bin/clang-tidy-12 /usr/bin/clang-tidy && \
-    ln -s /usr/bin/clang-12 /usr/bin/clang
-
-RUN curl -o- https://get.docker.com | sh
-
-# System conf for tests
-RUN locale -a
-ENV LC_ALL=en_US.utf8
-ENV LANG=en_US.utf8
-RUN echo "ulimit -c 0" >> /root/.bashrc
-
-# Setup Bazel caches
-RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
-    (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
-    cat /root/.bazelrc
-
-# Install some dependencies (miniconda, pip dependencies, etc)
-RUN mkdir /ray
-WORKDIR /ray
+    maven openjdk-8-jre openjdk-8-jdk
 
-# Below should be re-run each time
-COPY . .
-# init also calls install-dependencies.sh
+# init also calls install-dependencies.sh (again)
 RUN NO_DL=1 ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/Dockerfile.base_gpu
new file mode 100644
index 000000000000..232fe084876a
--- /dev/null
+++ b/ci/docker/Dockerfile.base_gpu
@@ -0,0 +1,68 @@
+FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu20.04
+
+ARG REMOTE_CACHE_URL
+ARG BUILDKITE_PULL_REQUEST
+ARG BUILDKITE_COMMIT
+ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
+ARG PYTHON=3.7
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=America/Los_Angeles
+
+ENV BUILDKITE=true
+ENV CI=true
+ENV PYTHON=$PYTHON
+ENV RAY_USE_RANDOM_PORTS=1
+ENV RAY_DEFAULT_BUILD=1
+ENV RAY_INSTALL_JAVA=0
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+# For wheel build
+# https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
+ENV DOCKER_TLS_CERTDIR=/certs
+ENV DOCKER_HOST=tcp://docker:2376
+ENV DOCKER_TLS_VERIFY=1
+ENV DOCKER_CERT_PATH=/certs/client
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
+
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+
+RUN apt-get update -qq && apt-get upgrade -qq
+RUN apt-get install -y -qq \
+    curl python-is-python3 git build-essential \
+    sudo unzip unrar apt-utils dialog tzdata wget rsync \
+    language-pack-en tmux cmake gdb vim htop \
+    libgtk2.0-dev zlib1g-dev libgl1-mesa-dev \
+    clang-format-12 jq \
+    clang-tidy-12 clang-12
+# Make using GCC 9 explicit.
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \
+    --slave /usr/bin/gcov gcov /usr/bin/gcov-9
+RUN ln -s /usr/bin/clang-format-12 /usr/bin/clang-format && \
+    ln -s /usr/bin/clang-tidy-12 /usr/bin/clang-tidy && \
+    ln -s /usr/bin/clang-12 /usr/bin/clang
+
+RUN curl -o- https://get.docker.com | sh
+
+# System conf for tests
+RUN locale -a
+ENV LC_ALL=en_US.utf8
+ENV LANG=en_US.utf8
+RUN echo "ulimit -c 0" >> /root/.bashrc
+
+# Setup Bazel caches
+RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
+    (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
+    cat /root/.bazelrc
+
+# Install some dependencies (miniconda, pip dependencies, etc)
+RUN mkdir /ray
+WORKDIR /ray
+
+# Below should be re-run each time
+COPY . .
+
+RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index b9d4c20a51e6..a1fe7465a115 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -1,2 +1,3 @@
-FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu20.04
+FROM DOCKER_IMAGE_GPU
 
+RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 9be76a290a95..8aea35a1baa4 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -1,3 +1,3 @@
-FROM [Dockerfile.test image]
+FROM DOCKER_IMAGE_TEST
 
 RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh

From bb33747a4cd8a53d49d35ec9350f94940f00d76a Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 15 Sep 2022 21:19:05 +0100
Subject: [PATCH 21/61] Fix FROMs

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_build |  3 ++-
 ci/docker/Dockerfile.gpu        | 14 +++++++++++++-
 ci/docker/Dockerfile.ml         |  3 ++-
 ci/docker/Dockerfile.test       |  2 +-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/ci/docker/Dockerfile.base_build b/ci/docker/Dockerfile.base_build
index 7d0972566092..0d73f080a3c4 100644
--- a/ci/docker/Dockerfile.base_build
+++ b/ci/docker/Dockerfile.base_build
@@ -1,4 +1,5 @@
-FROM ubuntu:focal
+ARG DOCKER_IMAGE_BASE_TEST
+FROM $DOCKER_IMAGE_BASE_TEST
 
 ENV RAY_INSTALL_JAVA=1
 
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index a1fe7465a115..bbdf5bc000fe 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -1,3 +1,15 @@
-FROM DOCKER_IMAGE_GPU
+ARG DOCKER_IMAGE_BASE_GPU
+FROM $DOCKER_IMAGE_BASE_GPU
+
+# Move out of working dir /ray
+# Delete stale data
+WORKDIR /
+RUN rm -rf /ray
+
+RUN mkdir /ray
+WORKDIR /ray
+
+# Copy new ray files
+COPY . .
 
 RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 8aea35a1baa4..203bbea22bae 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -1,3 +1,4 @@
-FROM DOCKER_IMAGE_TEST
+ARG DOCKER_IMAGE_TEST
+FROM $DOCKER_IMAGE_TEST
 
 RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 7a0c114c7a21..a992552652a7 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -12,4 +12,4 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init
+RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh

From 9e5b86cb78a4a4ce24e971597df20cd6b0aac02c Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 07:28:17 +0100
Subject: [PATCH 22/61] Run

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu | 2 +-
 ci/docker/Dockerfile.ml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index bbdf5bc000fe..83637fe43a7c 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -12,4 +12,4 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 203bbea22bae..5f82b17067b0 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -1,4 +1,4 @@
 ARG DOCKER_IMAGE_TEST
 FROM $DOCKER_IMAGE_TEST
 
-RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh

From 058cc9d14d6028bf4ddc01beaa23355feabb90e0 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 07:33:55 +0100
Subject: [PATCH 23/61] egg link

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu  | 3 +++
 ci/docker/Dockerfile.test | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index 83637fe43a7c..f0fec14ac2f0 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -12,4 +12,7 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
+# Create egg.link
+RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index a992552652a7..0d85610c8492 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -12,4 +12,7 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
+# Create egg.link
+RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+
 RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh

From 7f3af7d598bf60fe689903f8f8950ca209beb058 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 08:27:35 +0100
Subject: [PATCH 24/61] Base dockerfile, install dependencies update

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_build |  2 +-
 ci/docker/Dockerfile.base_gpu   |  2 +-
 ci/docker/Dockerfile.base_ml    | 13 +++++++++++++
 ci/docker/Dockerfile.base_test  |  2 +-
 ci/docker/Dockerfile.build      |  2 +-
 ci/docker/Dockerfile.ml         | 20 +++++++++++++++++---
 ci/docker/Dockerfile.test       |  2 +-
 ci/env/install-dependencies.sh  |  8 +++++---
 8 files changed, 40 insertions(+), 11 deletions(-)
 create mode 100644 ci/docker/Dockerfile.base_ml

diff --git a/ci/docker/Dockerfile.base_build b/ci/docker/Dockerfile.base_build
index 0d73f080a3c4..8c324fc5d2ac 100644
--- a/ci/docker/Dockerfile.base_build
+++ b/ci/docker/Dockerfile.base_build
@@ -7,4 +7,4 @@ RUN apt-get install -y -qq \
     maven openjdk-8-jre openjdk-8-jdk
 
 # init also calls install-dependencies.sh (again)
-RUN NO_DL=1 ./ci/ci.sh init
+RUN BUILD=1 ./ci/ci.sh init
diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/Dockerfile.base_gpu
index 232fe084876a..84cde4f2fe72 100644
--- a/ci/docker/Dockerfile.base_gpu
+++ b/ci/docker/Dockerfile.base_gpu
@@ -65,4 +65,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 
-RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init
+RUN ./ci/env/install-dependencies.sh init
diff --git a/ci/docker/Dockerfile.base_ml b/ci/docker/Dockerfile.base_ml
new file mode 100644
index 000000000000..f114f1687325
--- /dev/null
+++ b/ci/docker/Dockerfile.base_ml
@@ -0,0 +1,13 @@
+ARG DOCKER_IMAGE_BASE_TEST
+FROM $DOCKER_IMAGE_BASE_TEST
+
+# Move out of working dir /ray
+# Delete stale data
+WORKDIR /
+RUN rm -rf /ray
+
+RUN mkdir /ray
+WORKDIR /ray
+
+# Copy new ray files
+COPY . .
diff --git a/ci/docker/Dockerfile.base_test b/ci/docker/Dockerfile.base_test
index 4f28ee9a3b82..e03b05144703 100644
--- a/ci/docker/Dockerfile.base_test
+++ b/ci/docker/Dockerfile.base_test
@@ -62,4 +62,4 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 
-RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh init
+RUN ./ci/env/install-dependencies.sh init
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 8f0e4fbe4bef..7f0b3fc7b9c3 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -13,7 +13,7 @@ WORKDIR /ray
 COPY . .
 
 # init also calls install-dependencies.sh
-RUN NO_DL=1 bash --login -i ./ci/ci.sh init
+RUN BUILD=1 bash --login -i ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
 # Run determine test to run
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 5f82b17067b0..2f0125a9b210 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -1,4 +1,18 @@
-ARG DOCKER_IMAGE_TEST
-FROM $DOCKER_IMAGE_TEST
+ARG DOCKER_IMAGE_BASE_ML
+FROM $DOCKER_IMAGE_BASE_ML
 
-RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
+# Move out of working dir /ray
+# Delete stale data
+WORKDIR /
+RUN rm -rf /ray
+
+RUN mkdir /ray
+WORKDIR /ray
+
+# Copy new ray files
+COPY . .
+
+# Create egg.link
+RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 0d85610c8492..d89c9abe2344 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -15,4 +15,4 @@ COPY . .
 # Create egg.link
 RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
 
-RUN NO_DL=1 NO_BUILD=1 ./ci/env/install-dependencies.sh
+RUN ./ci/env/install-dependencies.sh
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index d23e2c0ca25f..e30f1e0b9d34 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -296,7 +296,7 @@ install_pip_packages() {
 
   if [ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]; then
     # Remove this entire section once Serve dependencies are fixed.
-    if [ "${NO_DL-}" != 1 ] && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
+    if ([ -z "${BUILDKITE-}" ] || [ "${DL-}" = "1" ]) && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
       # We want to install the CPU version only.
       pip install -r "${WORKSPACE_DIR}"/python/requirements/ml/requirements_dl.txt
     fi
@@ -430,7 +430,8 @@ install_pip_packages() {
 install_dependencies() {
   install_bazel
 
-  if [ "${NO_BUILD-}" != "1" ]; then
+  # Only install on buildkite if requested
+  if [ -z "${BUILDKITE-}" ] || [ "${BUILD-}" = "1" ]; then
     install_base
     install_toolchains
   fi
@@ -441,7 +442,8 @@ install_dependencies() {
 
   install_upgrade_pip
 
-  if [ "${NO_BUILD-}" != "1" ]; then
+  # Only install on buildkite if requested
+  if [ -z "${BUILDKITE-}" ] || [ "${BUILD-}" = "1" ]; then
     install_nvm
     if [ -n "${PYTHON-}" ] || [ -n "${LINT-}" ] || [ "${MAC_WHEELS-}" = 1 ]; then
       install_node

From 614cb22ef3fb23c8f995dc35ab88f12d12985170 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 08:33:51 +0100
Subject: [PATCH 25/61] ML base image

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_gpu | 1 +
 ci/docker/Dockerfile.base_ml  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/Dockerfile.base_gpu
index 84cde4f2fe72..6d33fd2d9b25 100644
--- a/ci/docker/Dockerfile.base_gpu
+++ b/ci/docker/Dockerfile.base_gpu
@@ -66,3 +66,4 @@ WORKDIR /ray
 COPY . .
 
 RUN ./ci/env/install-dependencies.sh init
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.base_ml b/ci/docker/Dockerfile.base_ml
index f114f1687325..405ec1df0ca4 100644
--- a/ci/docker/Dockerfile.base_ml
+++ b/ci/docker/Dockerfile.base_ml
@@ -11,3 +11,5 @@ WORKDIR /ray
 
 # Copy new ray files
 COPY . .
+
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh

From 0cbefcca9717ebcdd6b4a149fa6b9b6356adb399 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 11:09:59 +0100
Subject: [PATCH 26/61] Install ray properly

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu  | 4 ++--
 ci/docker/Dockerfile.ml   | 4 ++--
 ci/docker/Dockerfile.test | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index f0fec14ac2f0..205920027fd5 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -12,7 +12,7 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-# Create egg.link
-RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+# Install Ray
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 2f0125a9b210..e51c550d644f 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -12,7 +12,7 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-# Create egg.link
-RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+# Install Ray
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index d89c9abe2344..5b99fdb9935e 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -12,7 +12,7 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
-# Create egg.link
-RUN echo /ray/python > /opt/miniconda/lib/python3.7/site-packages/ray.egg-link
+# Install Ray
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
 
 RUN ./ci/env/install-dependencies.sh

From bbcba186877b6ac2d8ecb126e8634025d443fde2 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 11:29:34 +0100
Subject: [PATCH 27/61] Only install llvm binaries on buildkite if needed

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-llvm-binaries.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index a18c1fd8c021..b97ac748dbef 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -95,6 +95,10 @@ build:llvm --linkopt='-Wl,-rpath,${targetdir}/lib'
 # ==== end of --config=llvm options generated by ci/env/install-llvm-binaries.sh" >> .llvm-local.bazelrc
 }
 
+if [ -n "${BUILDKITE-}" ] && [ -d "${TARGET_DIR-}" ]; then
+    printInfo "${TARGET_DIR} already exists, skipping llvm download/install on Buildkite"
+    exit 0
+fi
 
 if [ ! -f ".bazelrc" ]; then
     printError ".bazelrc not found under working directory. Please run this script under repository root."

From 6c6318a9b436e40fa80eefc4c4d368a0f688c2b2 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 11:36:34 +0100
Subject: [PATCH 28/61] python3 pip

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu  | 2 +-
 ci/docker/Dockerfile.ml   | 2 +-
 ci/docker/Dockerfile.test | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index 205920027fd5..fc8cab932478 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index e51c550d644f..7f0eb8b47737 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 5b99fdb9935e..8d92cbf62b40 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
 
 RUN ./ci/env/install-dependencies.sh

From 74c419f8c8a43038f5c4bd4db196c088315fc872 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 16 Sep 2022 12:50:07 +0200
Subject: [PATCH 29/61] LLVM install script that enables replacement of llvm
 versions

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 ci/env/install-llvm-binaries.sh | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index b97ac748dbef..4bbef9acda1f 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -27,9 +27,17 @@ trap '[ $? -eq 0 ] || log_err' EXIT
 
 LLVM_URL="https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz"
 TARGET_DIR="/opt/llvm"
+LLVM_DOWNLOAD_URL_FILENAME="${TARGET_DIR}/llvm_download_url.txt"
 
 install_llvm() {
   local url targetdir
+  if [ -f "$LLVM_DOWNLOAD_URL_FILENAME" ]; then
+    read -r line < $LLVM_DOWNLOAD_URL_FILENAME
+    if [ $line == $LLVM_URL ]; then
+      printInfo "Skipping llvm download/install on Buildkite because LLVM was previously installed from the same URL ${line}."
+      exit 0
+    fi
+  fi
   if [ $# -ge 1 ]; then
     url="$1"
   else
@@ -93,16 +101,15 @@ build:llvm --linkopt='-fuse-ld=${targetdir}/bin/ld.lld'
 build:llvm --linkopt='-L${targetdir}/lib'
 build:llvm --linkopt='-Wl,-rpath,${targetdir}/lib'
 # ==== end of --config=llvm options generated by ci/env/install-llvm-binaries.sh" >> .llvm-local.bazelrc
+
+  echo $LLVM_URL > $LLVM_DOWNLOAD_URL_FILENAME
+  printInfo "LLVML installed and URL of current llvm install logged to $LLVM_DOWNLOAD_URL_FILENAME"
 }
 
-if [ -n "${BUILDKITE-}" ] && [ -d "${TARGET_DIR-}" ]; then
-    printInfo "${TARGET_DIR} already exists, skipping llvm download/install on Buildkite"
-    exit 0
-fi
 
 if [ ! -f ".bazelrc" ]; then
     printError ".bazelrc not found under working directory. Please run this script under repository root."
     exit 1
 fi
 
-install_llvm "$@"
+install_llvm "$@"
\ No newline at end of file

From 6f35a8b644dce227cb13cbd0d9481abb064da07f Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 11:55:18 +0100
Subject: [PATCH 30/61] move check

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-llvm-binaries.sh | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index 4bbef9acda1f..5603f1ebd102 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -31,13 +31,7 @@ LLVM_DOWNLOAD_URL_FILENAME="${TARGET_DIR}/llvm_download_url.txt"
 
 install_llvm() {
   local url targetdir
-  if [ -f "$LLVM_DOWNLOAD_URL_FILENAME" ]; then
-    read -r line < $LLVM_DOWNLOAD_URL_FILENAME
-    if [ $line == $LLVM_URL ]; then
-      printInfo "Skipping llvm download/install on Buildkite because LLVM was previously installed from the same URL ${line}."
-      exit 0
-    fi
-  fi
+
   if [ $# -ge 1 ]; then
     url="$1"
   else
@@ -102,10 +96,18 @@ build:llvm --linkopt='-L${targetdir}/lib'
 build:llvm --linkopt='-Wl,-rpath,${targetdir}/lib'
 # ==== end of --config=llvm options generated by ci/env/install-llvm-binaries.sh" >> .llvm-local.bazelrc
 
-  echo $LLVM_URL > $LLVM_DOWNLOAD_URL_FILENAME
+  echo "$url" > $LLVM_DOWNLOAD_URL_FILENAME
   printInfo "LLVML installed and URL of current llvm install logged to $LLVM_DOWNLOAD_URL_FILENAME"
 }
 
+if [ -n "${BUILDKITE-}" ] && [ -f "$LLVM_DOWNLOAD_URL_FILENAME" ]; then
+  read -r line < "$LLVM_DOWNLOAD_URL_FILENAME"
+  if [ "$line" == "$LLVM_URL" ]; then
+    printInfo "Skipping llvm download/install on Buildkite because LLVM was previously installed from the same URL ${line}."
+    exit 0
+  fi
+fi
+
 
 if [ ! -f ".bazelrc" ]; then
     printError ".bazelrc not found under working directory. Please run this script under repository root."

From 6d93d57a06b4bb54939a48ae12d85152b5c2f7ed Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 11:56:08 +0100
Subject: [PATCH 31/61] pip install

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu  | 2 +-
 ci/docker/Dockerfile.ml   | 2 +-
 ci/docker/Dockerfile.test | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index fc8cab932478..4aa28597903f 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 7f0eb8b47737..368a9f88f418 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 8d92cbf62b40..8dea9a3fec7d 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
 
 RUN ./ci/env/install-dependencies.sh

From 6b5336fe2e1bed2386aa4b182057cd26756b7e33 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 12:06:18 +0100
Subject: [PATCH 32/61] Fix install once more

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu  | 2 +-
 ci/docker/Dockerfile.ml   | 2 +-
 ci/docker/Dockerfile.test | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index 4aa28597903f..a6a7a7ab9311 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index 368a9f88f418..d29a153eca5b 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 
 RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 8dea9a3fec7d..1bac67a050bc 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -13,6 +13,6 @@ WORKDIR /ray
 COPY . .
 
 # Install Ray
-RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i python3 -m pip install -e /ray/python/
+RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 
 RUN ./ci/env/install-dependencies.sh

From 0dc8d4d53625f4eb6c38793b7901d3c6c16ce8b0 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 12:35:21 +0100
Subject: [PATCH 33/61] No wheels required

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.gpu.yml                   | 11 +--
 ...e.gpu.large.yml => pipeline.gpu_large.yml} | 12 +--
 .buildkite/pipeline.ml.yml                    | 80 +++++++++----------
 3 files changed, 52 insertions(+), 51 deletions(-)
 rename .buildkite/{pipeline.gpu.large.yml => pipeline.gpu_large.yml} (90%)

diff --git a/.buildkite/pipeline.gpu.yml b/.buildkite/pipeline.gpu.yml
index cf3ad630d479..2216541883ab 100644
--- a/.buildkite/pipeline.gpu.yml
+++ b/.buildkite/pipeline.gpu.yml
@@ -1,6 +1,6 @@
 # Todo: Enable once tests are available
 #- label: ":tv: :octopus: Tune GPU tests "
-#  conditions: ["RAY_CI_TUNE_AFFECTED"]
+#  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 #    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
@@ -9,7 +9,7 @@
 #    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only python/ray/tune/...
 
 - label: ":tv: :brain: RLlib: GPU Examples {A/B}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
@@ -24,6 +24,7 @@
 - label: ":tv: :serverless: Serve Tests"
   conditions:
     [
+        "NO_WHEELS_REQUIRED",
         "RAY_CI_SERVE_AFFECTED",
         "RAY_CI_PYTHON_AFFECTED",
         "RAY_CI_ML_AFFECTED",
@@ -36,7 +37,7 @@
 
 # Todo: enable once tests pass
 #- label: ":tv: :brain: RLlib: GPU Examples {C/D}"
-#  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+#  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 #    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
@@ -47,7 +48,7 @@
 
 # Todo: enable once tests pass
 #- label: ":tv: :brain: RLlib: GPU Examples {E/P}"
-#  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+#  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 #    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
@@ -59,7 +60,7 @@
 
 # Todo: enable once tests pass
 #- label: ":tv: :brain: RLlib: GPU Examples {Q/Z}"
-#  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+#  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 #    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu_large.yml
similarity index 90%
rename from .buildkite/pipeline.gpu.large.yml
rename to .buildkite/pipeline.gpu_large.yml
index 69cfae6e33bb..a27ec1d2ae03 100644
--- a/.buildkite/pipeline.gpu.large.yml
+++ b/.buildkite/pipeline.gpu_large.yml
@@ -1,5 +1,5 @@
 - label: ":tv: :steam_locomotive: Train GPU tests "
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - PYTHON=3.7 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -10,7 +10,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air,-torch_1_11 python/ray/train/...
 
 - label: ":tv: :steam_locomotive: Train GPU tests (PyTorch 1.11) "
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - PYTHON=3.7 TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -23,7 +23,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=torch_1_11 python/ray/train/...
 
 - label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - PYTHON=3.7 TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
@@ -32,7 +32,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train doc/...
 
 - label: ":tv: :brain: RLlib: Multi-GPU Tests"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
@@ -45,7 +45,7 @@
       --test_tag_filters=multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
 - label: ":tv: :airplane: ML GPU tests (ray/air)"
-  conditions: ["RAY_CI_ML_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_ML_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DATA_PROCESSING_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -56,7 +56,7 @@
 
 - label: ":tv: :book: Doc GPU tests and examples"
   conditions:
-    ["RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
+    ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
index 257188273bbd..d7b4ddcbe5bb 100644
--- a/.buildkite/pipeline.ml.yml
+++ b/.buildkite/pipeline.ml.yml
@@ -1,5 +1,5 @@
 - label: ":airplane: ML tests (ray/air)"
-  conditions: ["RAY_CI_ML_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_ML_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -9,7 +9,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air python/ray/data/...
 
 - label: ":brain: RLlib: Learning discr. actions TF2-static-graph"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -21,7 +21,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning cont. actions TF2-static-graph"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -33,7 +33,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning discr. actions TF2-eager-tracing"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -45,7 +45,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning cont. actions TF2-eager-tracing"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -57,7 +57,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning discr. actions PyTorch"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -69,7 +69,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning cont. actions PyTorch"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -81,7 +81,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Learning tests w/ 2 fake GPUs TF2-static-graph"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -94,7 +94,7 @@
 
 # TODO: (sven) tf2 (eager) multi-GPU
 - label: ":brain: RLlib: Learning tests w/ 2 fake GPUs PyTorch"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -106,7 +106,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Memory leak tests TF2-eager-tracing"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -118,7 +118,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Memory leak tests PyTorch"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -130,7 +130,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Quick Agent train.py runs (TODO: obsolete)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -141,7 +141,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Algorithm Tests (generic)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -154,7 +154,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Algorithm Tests (specific algos)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -167,7 +167,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Everything else (env-, evaluation-, ... dirs)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -181,7 +181,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Examples {A..B}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -190,7 +190,7 @@
       --test_tag_filters=examples_A,examples_B,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
 - label: ":brain: RLlib: Examples {Ca..Ct}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -199,7 +199,7 @@
       --test_tag_filters=examples_C_AtoT,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
 - label: ":brain: RLlib: Examples {Cu..Cz}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -208,7 +208,7 @@
       --test_tag_filters=examples_C_UtoZ,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
 - label: ":brain: RLlib: Examples {D..P}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -218,7 +218,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Examples {Q..Z}"
-  conditions: ["RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -228,7 +228,7 @@
       rllib/...
 
 - label: ":brain: RLlib: tests/ dir (A..L)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -238,7 +238,7 @@
       rllib/...
       
 - label: ":brain: RLlib: tests/ dir (M..Z (no R))"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -247,7 +247,7 @@
       --test_tag_filters=tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_S,tests_dir_T,tests_dir_U,tests_dir_V,tests_dir_W,tests_dir_X,tests_dir_Y,tests_dir_Z,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
       rllib/...
 - label: ":brain: RLlib: tests/ dir (R)"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -257,7 +257,7 @@
       rllib/...
 
 - label: ":brain: RLlib: Documentation code/examples"
-  conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -267,7 +267,7 @@
       rllib/...
 
 - label: ":octopus: Tune tests {A-R; no RLlib}"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -277,7 +277,7 @@
       python/ray/tune/...
 
 - label: ":octopus: Tune tests {S-Z; no RLlib}"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -289,7 +289,7 @@
 
 
 - label: ":octopus: Tune multinode tests"
-  conditions: [ "RAY_CI_TUNE_AFFECTED" ]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - LINUX_WHEELS=1 ./ci/ci.sh build
     - mkdir -p ~/.docker/cli-plugins/ && curl -SL https://github.com/docker/compose/releases/download/v2.0.1/docker-compose-linux-x86_64 -o ~/.docker/cli-plugins/docker-compose && chmod +x ~/.docker/cli-plugins/docker-compose
@@ -311,7 +311,7 @@
       --test_env=DOCKER_TLS_CERTDIR=/certs
 
 - label: ":octopus: Tune examples {w/o tf/pytorch; no RLlib}"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -319,7 +319,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=example,-tf,-pytorch,-py37,-soft_imports,-gpu_only,-rllib python/ray/tune/...
 
 - label: ":octopus: Tune examples {w/ tf/pytorch; no RLlib}"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -328,7 +328,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37,-soft_imports,-gpu_only,-rllib python/ray/tune/...
 
 - label: ":octopus: :brain: Tune tests and examples {using RLlib}"
-  conditions: ["RAY_CI_TUNE_AFFECTED", "RAY_CI_RLLIB_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -336,7 +336,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,rllib python/ray/tune/...
 
 - label: ":steam_locomotive: Train tests and examples"
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -344,7 +344,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,-minimal,-tune,-ray_air python/ray/train/...
 
 - label: ":steam_locomotive: :octopus: Train + Tune tests and examples"
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
@@ -352,7 +352,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tune,-gpu_only,-ray_air python/ray/train/...
 
 - label: ":octopus: Tune tests and examples. Python 3.7"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -360,7 +360,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=py37,-client python/ray/tune/...
 
 - label: ":octopus: ML library integrations tests and examples. Python 3.7"
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -371,14 +371,14 @@
 
 # TODO(amogkam): Re-enable Ludwig tests after Ludwig supports Ray 2.0
 #- label: ":octopus: Ludwig tests and examples. Python 3.7"
-#  conditions: ["RAY_CI_TUNE_AFFECTED"]
+#  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 #    - PYTHON=3.7 INSTALL_LUDWIG=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
 #    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/ludwig/...
 
 - label: ":tropical_fish: ML Libraries w/ Ray Client Examples (Python 3.7)."
-  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
@@ -388,7 +388,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=client python/ray/tune/...
 
 - label: ":potable_water: Dataset library integrations tests and examples. Python 3.7"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -398,7 +398,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-client python/ray/util/dask/...
 
 - label: ":potable_water: Dataset tests (Python 3.7)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -406,7 +406,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-ray_air python/ray/data/...
 
 - label: ":potable_water: Workflow tests (Python 3.7)"
-  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
@@ -432,7 +432,7 @@
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-needs_credentials,-gpu,-py37,-post_wheel_build doc/...
 
 - label: ":book: Doc examples with authentication "
-  conditions: ["RAY_CI_BRANCH_BUILD"]
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_BRANCH_BUILD"]
   commands:
     - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT

From e22c9fc1f15060094e3edd1d4698dce8ebb82f57 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 14:15:02 +0100
Subject: [PATCH 34/61] Restore old pipeline Signed-off-by: Kai Fricke
 <kai@anyscale.com>

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.gpu.large.yml |  65 ++++
 .buildkite/pipeline.yml           | 572 ++++++++++++++++++++++++++++++
 2 files changed, 637 insertions(+)
 create mode 100644 .buildkite/pipeline.gpu.large.yml

diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml
new file mode 100644
index 000000000000..a27ec1d2ae03
--- /dev/null
+++ b/.buildkite/pipeline.gpu.large.yml
@@ -0,0 +1,65 @@
+- label: ":tv: :steam_locomotive: Train GPU tests "
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - PYTHON=3.7 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    # Because Python version changed, we need to re-install Ray here
+    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air,-torch_1_11 python/ray/train/...
+
+- label: ":tv: :steam_locomotive: Train GPU tests (PyTorch 1.11) "
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - PYTHON=3.7 TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    # Because Python version changed, we need to re-install Ray here
+    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - pip uninstall torch -y
+    - pip install -U torch==1.11.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=torch_1_11 python/ray/train/...
+
+- label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - PYTHON=3.7 TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train doc/...
+
+- label: ":tv: :brain: RLlib: Multi-GPU Tests"
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
+    # and running tests in parallel would cause timeouts as the other scripts would
+    # wait for the GPU to become available.
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --jobs 2
+      --test_tag_filters=multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+
+- label: ":tv: :airplane: ML GPU tests (ray/air)"
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_ML_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DATA_PROCESSING_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/air/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/train/...
+
+- label: ":tv: :book: Doc GPU tests and examples"
+  conditions:
+    ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-py37,-post_wheel_build doc/...
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index a6ca78843880..1aacb13da9d3 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,5 +1,577 @@
+### ATTENTION: THIS FILE IS DEPRECATED AND WILL BE REMOVED SHORTLY
+### IT HAS BEEN SPLIT INTO TWO FILES:
+### - pipeline.build.yml FOR ALL TESTS THAT REQUIRE A FULL BUILD ENV (E.G. LLVM)
+### - pipeline.test.yml FOR THE REMAINING TESTS
+### IF YOU CHANGE SOMETHING HERE, CHANGE IT IN THE OTHER LOCATIONS, TOO!
+
+- label: ":ferris_wheel: Wheels and Jars"
+  conditions:
+    [
+        "RAY_CI_LINUX_WHEELS_AFFECTED",
+        "RAY_CI_JAVA_AFFECTED",
+    ]
+  commands:
+    # Build the wheels and jars
+    - UPLOAD_WHEELS_AS_ARTIFACTS=1 LINUX_WHEELS=1 LINUX_JARS=1 ./ci/ci.sh build
+    - bash ./java/build-jar-multiplatform.sh linux
+    # Upload the wheels and jars
+    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    # Upload to branch directory.
+    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+    - python .buildkite/copy_files.py --destination branch_jars --path ./.jar/linux
+    # Upload to latest directory.
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination jars --path ./.jar/linux; fi
+
+- label: ":ferris_wheel: Post-wheel tests"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=post_wheel_build
+      --test_env=CONDA_EXE
+      --test_env=CONDA_PYTHON_EXE
+      --test_env=CONDA_SHLVL
+      --test_env=CONDA_PREFIX
+      --test_env=CONDA_DEFAULT_ENV
+      --test_env=CI
+      --test_env=RAY_CI_POST_WHEEL_TESTS=True
+      python/ray/tests/... python/ray/serve/... python/ray/tune/... rllib/... doc/...
+
+- label: ":ferris_wheel: Debug Wheels"
+  conditions:
+    [
+        "RAY_CI_LINUX_WHEELS_AFFECTED",
+        "RAY_CI_JAVA_AFFECTED",
+    ]
+  commands:
+    # Build the debug wheels
+    - RAY_DEBUG_BUILD=debug LINUX_WHEELS=1 ./ci/ci.sh build
+    # Upload the wheels.
+    # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+    - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    # Upload to branch directory.
+    - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+    # Upload to latest directory.
+    - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+
+# Not working now.
+# - label: ":ferris_wheel: ASAN Wheels"
+#   conditions:
+#     [
+#         "RAY_CI_LINUX_WHEELS_AFFECTED",
+#         "RAY_CI_JAVA_AFFECTED",
+#     ]
+#   commands:
+#     # Build the asan wheels
+#     - RAY_DEBUG_BUILD=asan LINUX_WHEELS=1 ./ci/ci.sh build
+#     # Upload the wheels.
+#     # We don't want to push on PRs, in fact, the copy_files will fail because unauthenticated.
+#     - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
+#     - pip install -q docker aws_requests_auth boto3
+#     # Upload to branch directory.
+#     - python .buildkite/copy_files.py --destination branch_wheels --path ./.whl
+#     # Upload to latest directory.
+#     - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
+
+- label: ":docker: Build Images: py36 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py36 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cu111 cu112 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py37 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py37 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py38 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py38 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py38 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py39 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py39 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py39 --device-types cu111 cu112 cu113 cu116 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py310 (1/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
+
+- label: ":docker: Build Images: py310 (2/2)"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker aws_requests_auth boto3
+    - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
+    - python ./ci/build/build-docker-images.py --py-versions py310 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
+
 - label: ":book: Lint"
   commands:
     - export LINT=1
     - ./ci/env/install-dependencies.sh
     - ./ci/ci.sh lint
+
+- label: ":book: Documentation"
+  commands:
+    - export LINT=1
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client
+    # (remove thirdparty_files to sidestep an issue with psutil).
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - pushd /ray && git clean -f -f -x -d -e .whl -e python/ray/dashboard/client && popd
+    - bazel clean --expunge
+    - ./ci/ci.sh build
+
+- label: ":book: LinkCheck"
+  commands:
+    - export LINT=1
+    - ./ci/env/install-dependencies.sh
+    - ./ci/ci.sh check_sphinx_links
+  soft_fail: True
+
+- label: ":java: Java"
+  conditions: ["RAY_CI_JAVA_AFFECTED"]
+  commands:
+    - ./java/test.sh
+
+- label: ":cpp: Ray CPP Worker"
+  conditions: [ "RAY_CI_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_cpp
+
+- label: ":cpp: Tests"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=llvm $(./ci/run/bazel_export_options)
+      --build_tests_only
+      -- //:all -rllib/... -core_worker_test
+
+- label: ":cpp: Tests (ASAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=asan-clang $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test
+
+- label: ":cpp: Tests (UBSAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=ubsan $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test -//:logging_test -//:ray_syncer_test
+
+- label: ":cpp: Tests (TSAN)"
+  conditions: [ "RAY_CI_CORE_CPP_AFFECTED" ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci --config=tsan-clang $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --jobs=2
+      -- //:all -//:core_worker_test -//:event_test -//:gcs_actor_manager_test
+      -//:gcs_placement_group_manager_test -//:gcs_placement_group_scheduler_test
+      -//:gcs_server_rpc_test -//:gcs_client_test -//:gcs_heartbeat_manager_test
+      -//:metric_exporter_client_test -//:stats_test -//:worker_pool_test
+      -//:ray_syncer_test
+
+- label: ":serverless: Dashboard Tests"
+  conditions:
+    [
+        "RAY_CI_DASHBOARD_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - ./dashboard/tests/run_ui_tests.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) python/ray/dashboard/...
+
+- label: ":serverless: Serve Release Tests"
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=team:serve
+      release/...
+
+- label: ":serverless: Serve Tests"
+  parallelism: 3
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+        "RAY_CI_ML_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    - 'git clone https://github.com/wg/wrk.git /tmp/wrk && pushd /tmp/wrk && make -j && sudo cp wrk /usr/local/bin && popd'
+    - ./ci/env/env_info.sh
+    - >-
+      set -x;
+      python ./ci/run/bazel-sharding.py
+      --exclude_manual
+      --index "\${BUILDKITE_PARALLEL_JOB}" --count "\${BUILDKITE_PARALLEL_JOB_COUNT}"
+      python/ray/serve/...
+      > test_shard.txt
+    - cat test_shard.txt
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-post_wheel_build,-py37,-gpu
+      $(cat test_shard.txt)
+
+
+- label: ":serverless: Serve Tests (Python 3.7)"
+  conditions:
+    [
+        "RAY_CI_SERVE_AFFECTED",
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client.
+    # (Remove thirdparty_files to sidestep an issue with psutil.)
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - ./ci/ci.sh build
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=team:serve
+      python/ray/serve/test_gradio
+      python/ray/serve/test_gradio_visualization
+
+
+- label: ":python: Minimal install 3.6"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.6
+
+- label: ":python: Minimal install 3.7"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.7
+
+- label: ":python: Minimal install 3.8"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.8
+
+- label: ":python: Minimal install 3.9"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.9
+
+- label: ":python: Minimal install 3.10"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/ci.sh test_minimal 3.10
+
+- label: ":python: Default install"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/install-default.sh
+    - ./ci/env/env_info.sh
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/dashboard/test_dashboard
+
+- label: ":python: Ray Serve default install"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/install-serve.sh
+    - ./ci/env/env_info.sh
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/serve/test_deployment_graph
+    - bazel test --test_output=streamed --config=ci --test_env=RAY_DEFAULT=1 $(./ci/run/bazel_export_options)
+      python/ray/serve/test_api
+
+- label: ":python: Release test package unit tests"
+  conditions: ["ALWAYS"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip install -e release/
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --build_tests_only
+      --test_tag_filters=release_unit
+      release/...
+
+- label: ":python: (Small & Client)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bash ./ci/ci.sh prepare_docker
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=client_tests,small_size_python_tests
+      -- python/ray/tests/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=ray_ha
+      --test_env=DOCKER_HOST=tcp://docker:2376
+      --test_env=DOCKER_TLS_VERIFY=1
+      --test_env=DOCKER_CERT_PATH=/certs/client
+      --test_env=DOCKER_TLS_CERTDIR=/certs
+      -- python/ray/tests/...
+
+- label: ":python: (Large)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  parallelism: 3
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - ./ci/ci.sh test_large
+
+- label: ":python: (Medium A-J)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
+      python/ray/tests/...
+
+- label: ":python: (Medium K-Z)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
+      python/ray/tests/...
+
+- label: ":redis: (External Redis) (Small & Client)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=client_tests,small_size_python_tests
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- python/ray/tests/...
+
+- label: ":redis: (External Redis) (Large)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  parallelism: 3
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - TEST_EXTERNAL_REDIS=1 ./ci/ci.sh test_large
+
+- label: ":redis: (External Redis) (Medium A-J)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_a_to_j
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- //python/ray/tests/...
+
+- label: ":redis: (External Redis) (Medium K-Z)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
+      --test_env=TEST_EXTERNAL_REDIS=1
+      -- //python/ray/tests/...
+
+- label: ":python: Debug Test"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip uninstall -y ray
+    - RAY_DEBUG_BUILD=debug ./ci/ci.sh build
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci-debug $(./ci/run/bazel_export_options)
+      --test_tag_filters=-kubernetes,debug_tests
+      python/ray/tests/...
+
+- label: ":python: (ASAN tests)"
+  conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - pip install "grpcio >= 1.28.1, <= 1.43.0"
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci --config=asan $(./ci/run/bazel_export_options)
+      --config=asan-buildkite
+      --test_tag_filters=-kubernetes,asan_tests
+      --test_env=CONDA_EXE
+      --test_env=CONDA_PYTHON_EXE
+      --test_env=CONDA_SHLVL
+      --test_env=CONDA_PREFIX
+      --test_env=CONDA_DEFAULT_ENV
+      python/ray/tests/...
+
+# https://github.com/ray-project/ray/issues/22460
+#- label: ":python: (Privileged test)"
+  #conditions: ["RAY_CI_PYTHON_AFFECTED"]
+  #commands:
+    #- LINUX_WHEELS=1 ./ci/ci.sh build
+    #- pip install docker
+     #We build image ray-worker-container:nightly-py36-cpu which have installed podman,but not push it.
+     #And we save this image to a tarball, so that we can load it to podman image storage in the
+     #nested-container which run tests. And in this nested-container, Raylet will start ray worker
+     #process in double-nested-container.
+    #- python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu --build-type BUILDKITE --only-build-worker-container
+    #- mkdir /ray-mount/containers
+    #- docker save -o /ray-mount/containers/images.tar rayproject/ray-worker-container:nightly-py36-cpu
+    #- docker run --rm --privileged -v /ray/containers:/var/lib/containers -v /ray:/ray --entrypoint /bin/bash
+      #rayproject/ray-worker-container:nightly-py36-cpu /ray/ci/build/test-worker-in-container.sh
+
+- label: ":octopus: Tune soft imports test"
+  conditions: ["RAY_CI_TUNE_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    # no TUNE_TESTING=1 on purpose
+    - ./ci/env/install-dependencies.sh
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
+
+# Test to see if Train can be used without torch, tf, etc. installed
+- label: ":steam_locomotive: Train minimal install"
+  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  commands:
+      - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+      - TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
+      - ./ci/env/env_info.sh
+      - python ./ci/env/check_minimal_install.py
+      - bazel test --config=ci $(./ci/run/bazel_export_options)  --build_tests_only --test_tag_filters=minimal python/ray/train/...
+
+- label: ":kubernetes: operator"
+  conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
+  commands:
+    - |
+      cleanup() {
+        if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi
+        python python/ray/tests/kuberay/setup/teardown_kuberay.py || true
+        kind delete cluster
+      }
+      trap cleanup EXIT
+    - echo "--- Setting up Python 3.7 environment."
+    - PYTHON=3.7 ./ci/env/install-dependencies.sh
+    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client.
+    # (Remove thirdparty_files to sidestep an issue with psutil.)
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - pip install -e /ray/python
+    - echo "--- Setting up local kind cluster."
+    - ./ci/k8s/prep-k8s-environment.sh
+    - echo "--- Building py37-cpu Ray image for the test."
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - pip install -q docker
+    - python ci/build/build-docker-images.py --py-versions py37 --device-types cpu --build-type LOCAL --build-base
+    # Tag the image built in the last step. We want to be sure to distinguish the image from the real Ray nightly.
+    - docker tag rayproject/ray:nightly-py37-cpu ray-ci:kuberay-test
+    # Load the image into the kind node.
+    - kind load docker-image ray-ci:kuberay-test
+    - echo "--- Setting up KubeRay operator."
+    - python python/ray/tests/kuberay/setup/setup_kuberay.py
+    - ./ci/env/env_info.sh
+    - echo "--- Running the test."
+    - bazel test --config=ci $(./ci/run/bazel_export_options)
+      --test_tag_filters=kuberay_operator
+      --test_env=RAY_IMAGE=docker.io/library/ray-ci:kuberay-test
+      --test_env=PULL_POLICY=IfNotPresent
+      --test_env=KUBECONFIG=/root/.kube/config
+      python/ray/tests/...
+
+- label: ":python: Ray DAG Tests"
+  conditions:
+    [
+        "RAY_CI_PYTHON_AFFECTED",
+    ]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - pip install -U pydot
+    - sudo apt-get install -y graphviz
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./scripts/bazel_export_options)
+      --test_tag_filters=ray_dag_tests
+      python/ray/dag/...

From b4520ea6692c1078b85c5ef162348d2a3270025e Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 14:24:47 +0100
Subject: [PATCH 35/61] newline

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-llvm-binaries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index 5603f1ebd102..d8b60a467c8f 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -114,4 +114,4 @@ if [ ! -f ".bazelrc" ]; then
     exit 1
 fi
 
-install_llvm "$@"
\ No newline at end of file
+install_llvm "$@"

From 85b661d944c3386addc52631658cfad2fcc8d221 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 14:35:39 +0100
Subject: [PATCH 36/61] Minimal install test should be in BUILD

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml | 10 ++++++++++
 .buildkite/pipeline.test.yml  | 11 -----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index 0c388469fb5e..307278e3bdbe 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -510,3 +510,13 @@
       --test_env=PULL_POLICY=IfNotPresent
       --test_env=KUBECONFIG=/root/.kube/config
       python/ray/tests/...
+
+# Test to see if Train can be used without torch, tf, etc. installed
+- label: ":steam_locomotive: Train minimal install"
+  conditions: ["RAY_CI_TRAIN_AFFECTED"]
+  commands:
+      - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+      - TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
+      - ./ci/env/env_info.sh
+      - python ./ci/env/check_minimal_install.py
+      - bazel test --config=ci $(./ci/run/bazel_export_options)  --build_tests_only --test_tag_filters=minimal python/ray/train/...
diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml
index 48274a7f9350..c227395258e9 100644
--- a/.buildkite/pipeline.test.yml
+++ b/.buildkite/pipeline.test.yml
@@ -35,17 +35,6 @@
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
 
-# Test to see if Train can be used without torch, tf, etc. installed
-- label: ":steam_locomotive: Train minimal install"
-  conditions: ["RAY_CI_TRAIN_AFFECTED"]
-  commands:
-      - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-      - TRAIN_MINIMAL_INSTALL=1 ./ci/env/install-minimal.sh
-      - ./ci/env/env_info.sh
-      - python ./ci/env/check_minimal_install.py
-      - bazel test --config=ci $(./ci/run/bazel_export_options)  --build_tests_only --test_tag_filters=minimal python/ray/train/...
-
-
 - label: ":python: Ray DAG Tests"
   conditions:
     [

From a2c9ec1b28181cc1a615bbe074d50af37c07bea0 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 15:36:57 +0100
Subject: [PATCH 37/61] Move multinode test to BUILD

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml | 25 +++++++++++++++++++++++++
 .buildkite/pipeline.ml.yml    | 23 -----------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index 307278e3bdbe..bb5ab5b55916 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -511,6 +511,31 @@
       --test_env=KUBECONFIG=/root/.kube/config
       python/ray/tests/...
 
+
+- label: ":octopus: Tune multinode tests"
+  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
+  commands:
+    - LINUX_WHEELS=1 ./ci/ci.sh build
+    - mkdir -p ~/.docker/cli-plugins/ && curl -SL https://github.com/docker/compose/releases/download/v2.0.1/docker-compose-linux-x86_64 -o ~/.docker/cli-plugins/docker-compose && chmod +x ~/.docker/cli-plugins/docker-compose
+    - pip install -U docker aws_requests_auth boto3
+    - ./ci/env/env_info.sh
+    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cpu --build-type LOCAL --build-base
+    - python ./ci/build/build-multinode-image.py rayproject/ray:nightly-py37-cpu rayproject/ray:multinode-py37
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
+      --test_tag_filters=multinode,-example,-flaky,-py37,-soft_imports,-gpu_only,-rllib
+      python/ray/tune/...
+      --test_env=RAY_HAS_SSH="1"
+      --test_env=RAY_DOCKER_IMAGE="rayproject/ray:multinode-py37"
+      --test_env=RAY_TEMPDIR="/ray-mount"
+      --test_env=RAY_HOSTDIR="/ray"
+      --test_env=RAY_TESTHOST="dind-daemon"
+      --test_env=DOCKER_HOST=tcp://docker:2376
+      --test_env=DOCKER_TLS_VERIFY=1
+      --test_env=DOCKER_CERT_PATH=/certs/client
+      --test_env=DOCKER_TLS_CERTDIR=/certs
+
+
+
 # Test to see if Train can be used without torch, tf, etc. installed
 - label: ":steam_locomotive: Train minimal install"
   conditions: ["RAY_CI_TRAIN_AFFECTED"]
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
index d7b4ddcbe5bb..01064682bd61 100644
--- a/.buildkite/pipeline.ml.yml
+++ b/.buildkite/pipeline.ml.yml
@@ -288,28 +288,6 @@
       python/ray/tune/...
 
 
-- label: ":octopus: Tune multinode tests"
-  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
-  commands:
-    - LINUX_WHEELS=1 ./ci/ci.sh build
-    - mkdir -p ~/.docker/cli-plugins/ && curl -SL https://github.com/docker/compose/releases/download/v2.0.1/docker-compose-linux-x86_64 -o ~/.docker/cli-plugins/docker-compose && chmod +x ~/.docker/cli-plugins/docker-compose
-    - pip install -U docker aws_requests_auth boto3
-    - ./ci/env/env_info.sh
-    - python ./ci/build/build-docker-images.py --py-versions py37 --device-types cpu --build-type LOCAL --build-base
-    - python ./ci/build/build-multinode-image.py rayproject/ray:nightly-py37-cpu rayproject/ray:multinode-py37
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
-      --test_tag_filters=multinode,-example,-flaky,-py37,-soft_imports,-gpu_only,-rllib
-      python/ray/tune/...
-      --test_env=RAY_HAS_SSH="1"
-      --test_env=RAY_DOCKER_IMAGE="rayproject/ray:multinode-py37"
-      --test_env=RAY_TEMPDIR="/ray-mount"
-      --test_env=RAY_HOSTDIR="/ray"
-      --test_env=RAY_TESTHOST="dind-daemon"
-      --test_env=DOCKER_HOST=tcp://docker:2376
-      --test_env=DOCKER_TLS_VERIFY=1
-      --test_env=DOCKER_CERT_PATH=/certs/client
-      --test_env=DOCKER_TLS_CERTDIR=/certs
-
 - label: ":octopus: Tune examples {w/o tf/pytorch; no RLlib}"
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
@@ -382,7 +360,6 @@
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
-    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=client --test_env=RAY_CLIENT_MODE=1 python/ray/util/dask/...
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=client python/ray/tune/...

From 0fd08a4d5cdb9620230f1433c23e5e2c740a875e Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 15:55:55 +0100
Subject: [PATCH 38/61] Python 3.7 is default

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml     |  8 ++--
 .buildkite/pipeline.gpu.large.yml | 10 ++--
 .buildkite/pipeline.gpu.yml       |  2 +-
 .buildkite/pipeline.gpu_large.yml | 14 ++----
 .buildkite/pipeline.ml.yml        | 76 +++++++++++++++----------------
 .buildkite/pipeline.test.yml      |  4 +-
 .buildkite/pipeline.yml           | 12 ++---
 7 files changed, 61 insertions(+), 65 deletions(-)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index bb5ab5b55916..d43a1366c403 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -276,8 +276,8 @@
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client.
     # (Remove thirdparty_files to sidestep an issue with psutil.)
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
@@ -484,8 +484,8 @@
       }
       trap cleanup EXIT
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client.
     # (Remove thirdparty_files to sidestep an issue with psutil.)
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml
index a27ec1d2ae03..b8950f7b1a85 100644
--- a/.buildkite/pipeline.gpu.large.yml
+++ b/.buildkite/pipeline.gpu.large.yml
@@ -2,7 +2,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     # Because Python version changed, we need to re-install Ray here
     - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
     - pip install -Ur ./python/requirements_ml_docker.txt
@@ -13,7 +13,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     # Because Python version changed, we need to re-install Ray here
     - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
     - pip install -Ur ./python/requirements_ml_docker.txt
@@ -26,7 +26,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
+    - TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train doc/...
@@ -35,7 +35,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
@@ -59,7 +59,7 @@
     ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-py37,-post_wheel_build doc/...
diff --git a/.buildkite/pipeline.gpu.yml b/.buildkite/pipeline.gpu.yml
index 2216541883ab..e5576941b02a 100644
--- a/.buildkite/pipeline.gpu.yml
+++ b/.buildkite/pipeline.gpu.yml
@@ -12,7 +12,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     # --jobs 1 is necessary as we only have 1 GPU on the machine and running tests in parallel
diff --git a/.buildkite/pipeline.gpu_large.yml b/.buildkite/pipeline.gpu_large.yml
index a27ec1d2ae03..b4dc4b4520b4 100644
--- a/.buildkite/pipeline.gpu_large.yml
+++ b/.buildkite/pipeline.gpu_large.yml
@@ -2,9 +2,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
-    # Because Python version changed, we need to re-install Ray here
-    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
+    - TRAIN_TESTING=1 TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air,-torch_1_11 python/ray/train/...
@@ -13,9 +11,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
-    # Because Python version changed, we need to re-install Ray here
-    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/ci.sh build
+    - TRAIN_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - pip uninstall torch -y
     - pip install -U torch==1.11.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
@@ -26,7 +22,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
+    - TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train doc/...
@@ -35,7 +31,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - PYTHON=3.7 RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
@@ -59,7 +55,7 @@
     ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-py37,-post_wheel_build doc/...
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
index 01064682bd61..a87062d163c7 100644
--- a/.buildkite/pipeline.ml.yml
+++ b/.buildkite/pipeline.ml.yml
@@ -12,7 +12,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -24,7 +24,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -36,7 +36,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -48,7 +48,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -60,7 +60,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -72,7 +72,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -84,7 +84,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -97,7 +97,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -109,7 +109,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -121,7 +121,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
@@ -133,7 +133,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --build_tests_only
       --test_tag_filters=quick_train,-multi_gpu
@@ -144,7 +144,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     # Test all tests in the `algorithms` dir:
     - bazel test --config=ci $(./ci/run/bazel_export_options)
@@ -157,7 +157,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     # Test all tests in the `algorithms` dir:
     - bazel test --config=ci $(./ci/run/bazel_export_options)
@@ -170,7 +170,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     # Test everything that does not have any of the "main" labels:
     # "learning_tests|quick_train|examples|tests_dir".
@@ -184,7 +184,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=examples_A,examples_B,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
@@ -193,7 +193,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=examples_C_AtoT,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
@@ -202,7 +202,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=examples_C_UtoZ,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
@@ -211,7 +211,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=examples_D,examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -221,7 +221,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -231,7 +231,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=tests_dir_A,tests_dir_B,tests_dir_C,tests_dir_D,tests_dir_E,tests_dir_F,tests_dir_G,tests_dir_H,tests_dir_I,tests_dir_J,tests_dir_K,tests_dir_L --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -241,7 +241,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_S,tests_dir_T,tests_dir_U,tests_dir_V,tests_dir_W,tests_dir_X,tests_dir_Y,tests_dir_Z,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -250,7 +250,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=tests_dir_R,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -260,7 +260,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=documentation --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
@@ -270,7 +270,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
       --test_tag_filters=tests_dir_A,tests_dir_B,tests_dir_C,tests_dir_D,tests_dir_E,tests_dir_F,tests_dir_G,tests_dir_H,tests_dir_I,tests_dir_J,tests_dir_K,tests_dir_L,tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_R,-example,-py37,-soft_imports,-gpu_only,-rllib
@@ -280,7 +280,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - python ./ci/env/setup_credentials.py sigopt
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
@@ -292,7 +292,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=example,-tf,-pytorch,-py37,-soft_imports,-gpu_only,-rllib python/ray/tune/...
 
@@ -300,7 +300,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tf,-pytorch,-py37,-soft_imports,-gpu_only,-rllib python/ray/tune/...
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37,-soft_imports,-gpu_only,-rllib python/ray/tune/...
@@ -309,7 +309,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,rllib python/ray/tune/...
 
@@ -333,7 +333,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=py37,-client python/ray/tune/...
 
@@ -341,7 +341,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/xgboost/...
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/horovod/...
@@ -352,14 +352,14 @@
 #  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
 #  commands:
 #    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-#    - PYTHON=3.7 INSTALL_LUDWIG=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+#    - INSTALL_LUDWIG=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
 #    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/ludwig/...
 
 - label: ":tropical_fish: ML Libraries w/ Ray Client Examples (Python 3.7)."
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
+    - TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=client --test_env=RAY_CLIENT_MODE=1 python/ray/util/dask/...
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=client python/ray/tune/...
@@ -368,7 +368,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/modin/...
     # Dask tests and examples.
@@ -378,7 +378,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-ray_air python/ray/data/...
 
@@ -386,7 +386,7 @@
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DATA_PROCESSING_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/workflow/...
 
@@ -395,7 +395,7 @@
     ["RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED", "RAY_CI_SERVE_AFFECTED", "RAY_CI_ML_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DOC_TESTING=1 INSTALL_HOROVOD=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DOC_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-ray_air,-gpu,-py37,-post_wheel_build doc/...
 
@@ -404,7 +404,7 @@
     ["RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED", "RAY_CI_SERVE_AFFECTED", "RAY_CI_ML_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DOC_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DOC_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-needs_credentials,-gpu,-py37,-post_wheel_build doc/...
 
@@ -413,7 +413,7 @@
   commands:
     - if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then exit 0; fi
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - DOC_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - DOC_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - python ./ci/env/setup_credentials.py wandb comet_ml
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=needs_credentials,-gpu,-py37,-post_wheel_build doc/...
diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml
index c227395258e9..de1ed55a9318 100644
--- a/.buildkite/pipeline.test.yml
+++ b/.buildkite/pipeline.test.yml
@@ -9,8 +9,8 @@
   commands:
     - export LINT=1
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client
     # (remove thirdparty_files to sidestep an issue with psutil).
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 1aacb13da9d3..1a67360c5980 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -180,8 +180,8 @@
   commands:
     - export LINT=1
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client
     # (remove thirdparty_files to sidestep an issue with psutil).
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
@@ -308,8 +308,8 @@
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - TORCH_VERSION=1.6 ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client.
     # (Remove thirdparty_files to sidestep an issue with psutil.)
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
@@ -535,8 +535,8 @@
       }
       trap cleanup EXIT
     - echo "--- Setting up Python 3.7 environment."
-    - PYTHON=3.7 ./ci/env/install-dependencies.sh
-    # Specifying PYTHON=3.7 above somehow messes up the Ray install.
+    - ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
     # Uninstall and re-install Ray so that we can use Ray Client.
     # (Remove thirdparty_files to sidestep an issue with psutil.)
     - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files

From 8f9159515effe8cfc00351ba64d711c76ebef41f Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 19:49:55 +0100
Subject: [PATCH 39/61] Fix minimal install

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml  | 6 ++++++
 ci/env/install-dependencies.sh | 1 +
 2 files changed, 7 insertions(+)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index d43a1366c403..4dc3285e1f22 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -353,6 +353,7 @@
   conditions: ["RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - bash ./ci/ci.sh prepare_docker
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
@@ -371,6 +372,7 @@
   parallelism: 3
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - ./ci/ci.sh test_large
 
@@ -387,6 +389,7 @@
   conditions: ["RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options)
       --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
       python/ray/tests/...
@@ -395,6 +398,7 @@
   conditions: ["RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./scripts/bazel_export_options)
       --test_tag_filters=client_tests,small_size_python_tests
@@ -406,6 +410,7 @@
   parallelism: 3
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - TEST_EXTERNAL_REDIS=1 ./ci/ci.sh test_large
 
@@ -423,6 +428,7 @@
   conditions: ["RAY_CI_PYTHON_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DL=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./scripts/bazel_export_options)
       --test_tag_filters=-kubernetes,medium_size_python_tests_k_to_z
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index e30f1e0b9d34..b6261fd034b8 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -148,6 +148,7 @@ install_miniconda() {
     (
       set +x
       echo "Resetting Anaconda Python ${python_version}..."
+      pip freeze | grep -v conda | xargs -n 1 pip uninstall
       "${WORKSPACE_DIR}"/ci/suppress_output conda install -q -y --rev 0
     )
   fi

From 87df7f86cc1e763f578098782f3ec769f078e64a Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Fri, 16 Sep 2022 23:51:25 +0100
Subject: [PATCH 40/61] Add args to build docker file, update conda install env

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.build     | 11 +++++++++++
 ci/env/install-dependencies.sh |  5 +++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 7f0b3fc7b9c3..022d632690ef 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,6 +1,17 @@
 ARG DOCKER_IMAGE_BASE_BUILD
 FROM $DOCKER_IMAGE_BASE_BUILD
 
+ARG REMOTE_CACHE_URL
+ARG BUILDKITE_PULL_REQUEST
+ARG BUILDKITE_COMMIT
+ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
+
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
+
 # Move out of working dir /ray
 # Delete stale data
 WORKDIR /
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index b6261fd034b8..df4cd98cb23c 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -148,8 +148,9 @@ install_miniconda() {
     (
       set +x
       echo "Resetting Anaconda Python ${python_version}..."
-      pip freeze | grep -v conda | xargs -n 1 pip uninstall
-      "${WORKSPACE_DIR}"/ci/suppress_output conda install -q -y --rev 0
+      "${WORKSPACE_DIR}"/ci/suppress_output conda create -q -y -n minimal python==3.7 pip
+      "${WORKSPACE_DIR}"/ci/suppress_output conda activate minimal
+      echo "conda activate minimal" >> "$HOME/.bashrc"
     )
   fi
 

From 6b967639d1ffedd8806470ce4711a05651fc3a0e Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sat, 17 Sep 2022 08:21:44 +0100
Subject: [PATCH 41/61] Some fixes

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.build                 | 5 +++++
 ci/env/install-dependencies.sh             | 6 ++++--
 ci/env/install-minimal.sh                  | 2 +-
 python/ray/tests/test_state_api_log.py     | 2 +-
 python/ray/tests/test_state_api_summary.py | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 022d632690ef..7b18e8e91434 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -23,10 +23,15 @@ WORKDIR /ray
 # Below should be re-run each time
 COPY . .
 
+RUN env
+
 # init also calls install-dependencies.sh
 RUN BUILD=1 bash --login -i ./ci/ci.sh init
 RUN bash --login -i ./ci/ci.sh build
 
+RUN export CC=clang CXX=clang++-12
+
 # Run determine test to run
 RUN bash --login -i -c "python ./ci/pipeline/determine_tests_to_run.py --output=json > affected_set.json"
 RUN cat affected_set.json
+
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index df4cd98cb23c..6bd11b03a1a2 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -148,8 +148,10 @@ install_miniconda() {
     (
       set +x
       echo "Resetting Anaconda Python ${python_version}..."
-      "${WORKSPACE_DIR}"/ci/suppress_output conda create -q -y -n minimal python==3.7 pip
+      source /opt/miniconda/etc/profile.d/conda.sh
+      "${WORKSPACE_DIR}"/ci/suppress_output conda create -q -y -n minimal python==${PYTHON} pip
       "${WORKSPACE_DIR}"/ci/suppress_output conda activate minimal
+      echo "source /opt/miniconda/etc/profile.d/conda.sh" >> "$HOME/.bashrc"
       echo "conda activate minimal" >> "$HOME/.bashrc"
     )
   fi
@@ -296,7 +298,7 @@ install_pip_packages() {
     pip install --no-clean dm-tree==0.1.5  # --no-clean is due to: https://github.com/deepmind/tree/issues/5
   fi
 
-  if [ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]; then
+  if ([ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]) || [ "${DL-}" = "1" ]; then
     # Remove this entire section once Serve dependencies are fixed.
     if ([ -z "${BUILDKITE-}" ] || [ "${DL-}" = "1" ]) && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
       # We want to install the CPU version only.
diff --git a/ci/env/install-minimal.sh b/ci/env/install-minimal.sh
index 40db33a5698a..8b7f3e555fbe 100755
--- a/ci/env/install-minimal.sh
+++ b/ci/env/install-minimal.sh
@@ -2,7 +2,7 @@
 
 # Python version can be specified as 3.7, 3.8, 3.9, etc..
 if [ -z "$1" ]; then
-    PYTHON_VERSION=${PYTHON-3.7}
+    PYTHON_VERSION="$1"
 else
     if [ "$1" = "3.6" ]; then
         PYTHON_VERSION=${PYTHON-3.6}
diff --git a/python/ray/tests/test_state_api_log.py b/python/ray/tests/test_state_api_log.py
index 7aa11d640d2d..21019710bc4a 100644
--- a/python/ray/tests/test_state_api_log.py
+++ b/python/ray/tests/test_state_api_log.py
@@ -28,7 +28,7 @@
 from ray.experimental.state.exception import DataSourceUnavailable
 from ray.experimental.state.state_manager import StateDataSourceClient
 
-if sys.version_info > (3, 7, 0):
+if sys.version_info >= (3, 8, 0):
     from unittest.mock import AsyncMock
 else:
     from asyncmock import AsyncMock
diff --git a/python/ray/tests/test_state_api_summary.py b/python/ray/tests/test_state_api_summary.py
index 541eb17bffc5..dfa2548012c1 100644
--- a/python/ray/tests/test_state_api_summary.py
+++ b/python/ray/tests/test_state_api_summary.py
@@ -14,7 +14,7 @@
 from ray._private.test_utils import wait_for_condition
 from ray._raylet import ActorID, TaskID, ObjectID
 
-if sys.version_info > (3, 7, 0):
+if sys.version_info >= (3, 8, 0):
     from unittest.mock import AsyncMock
 else:
     from asyncmock import AsyncMock

From cece506b063443970b7cfd5a2cc4c0586f3f0549 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sat, 17 Sep 2022 16:51:34 +0100
Subject: [PATCH 42/61] do not set commit env

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_gpu  | 4 ----
 ci/docker/Dockerfile.base_test | 5 +----
 ci/docker/Dockerfile.build     | 2 --
 ci/docker/Dockerfile.gpu       | 9 +++++++++
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/Dockerfile.base_gpu
index 6d33fd2d9b25..377f0297424f 100644
--- a/ci/docker/Dockerfile.base_gpu
+++ b/ci/docker/Dockerfile.base_gpu
@@ -15,16 +15,12 @@ ENV PYTHON=$PYTHON
 ENV RAY_USE_RANDOM_PORTS=1
 ENV RAY_DEFAULT_BUILD=1
 ENV RAY_INSTALL_JAVA=0
-ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
-ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
-ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
 # For wheel build
 # https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
 ENV DOCKER_TLS_CERTDIR=/certs
 ENV DOCKER_HOST=tcp://docker:2376
 ENV DOCKER_TLS_VERIFY=1
 ENV DOCKER_CERT_PATH=/certs/client
-ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
diff --git a/ci/docker/Dockerfile.base_test b/ci/docker/Dockerfile.base_test
index e03b05144703..ff69354e61af 100644
--- a/ci/docker/Dockerfile.base_test
+++ b/ci/docker/Dockerfile.base_test
@@ -15,16 +15,13 @@ ENV PYTHON=$PYTHON
 ENV RAY_USE_RANDOM_PORTS=1
 ENV RAY_DEFAULT_BUILD=1
 ENV RAY_INSTALL_JAVA=0
-ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
-ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
-ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+
 # For wheel build
 # https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
 ENV DOCKER_TLS_CERTDIR=/certs
 ENV DOCKER_HOST=tcp://docker:2376
 ENV DOCKER_TLS_VERIFY=1
 ENV DOCKER_CERT_PATH=/certs/client
-ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
 RUN apt-get update -qq && apt-get upgrade -qq
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
index 7b18e8e91434..d8877faaf892 100644
--- a/ci/docker/Dockerfile.build
+++ b/ci/docker/Dockerfile.build
@@ -1,7 +1,6 @@
 ARG DOCKER_IMAGE_BASE_BUILD
 FROM $DOCKER_IMAGE_BASE_BUILD
 
-ARG REMOTE_CACHE_URL
 ARG BUILDKITE_PULL_REQUEST
 ARG BUILDKITE_COMMIT
 ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
@@ -10,7 +9,6 @@ ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
 ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
 ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
-ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
 # Move out of working dir /ray
 # Delete stale data
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index a6a7a7ab9311..99e184e388a2 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -1,6 +1,15 @@
 ARG DOCKER_IMAGE_BASE_GPU
 FROM $DOCKER_IMAGE_BASE_GPU
 
+ARG BUILDKITE_PULL_REQUEST
+ARG BUILDKITE_COMMIT
+ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
+
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
+
 # Move out of working dir /ray
 # Delete stale data
 WORKDIR /

From f0fd80ed64ea53097a9539ff51a5ce7dafcdb68e Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sat, 17 Sep 2022 17:18:02 +0100
Subject: [PATCH 43/61] Revert env changes

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.base_gpu  | 4 ++++
 ci/docker/Dockerfile.base_test | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/Dockerfile.base_gpu
index 377f0297424f..6d33fd2d9b25 100644
--- a/ci/docker/Dockerfile.base_gpu
+++ b/ci/docker/Dockerfile.base_gpu
@@ -15,12 +15,16 @@ ENV PYTHON=$PYTHON
 ENV RAY_USE_RANDOM_PORTS=1
 ENV RAY_DEFAULT_BUILD=1
 ENV RAY_INSTALL_JAVA=0
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
 # For wheel build
 # https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
 ENV DOCKER_TLS_CERTDIR=/certs
 ENV DOCKER_HOST=tcp://docker:2376
 ENV DOCKER_TLS_VERIFY=1
 ENV DOCKER_CERT_PATH=/certs/client
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
diff --git a/ci/docker/Dockerfile.base_test b/ci/docker/Dockerfile.base_test
index ff69354e61af..e03b05144703 100644
--- a/ci/docker/Dockerfile.base_test
+++ b/ci/docker/Dockerfile.base_test
@@ -15,13 +15,16 @@ ENV PYTHON=$PYTHON
 ENV RAY_USE_RANDOM_PORTS=1
 ENV RAY_DEFAULT_BUILD=1
 ENV RAY_INSTALL_JAVA=0
-
+ENV BUILDKITE_PULL_REQUEST=${BUILDKITE_PULL_REQUEST}
+ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT}
+ENV BUILDKITE_PULL_REQUEST_BASE_BRANCH=${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
 # For wheel build
 # https://github.com/docker-library/docker/blob/master/20.10/docker-entrypoint.sh
 ENV DOCKER_TLS_CERTDIR=/certs
 ENV DOCKER_HOST=tcp://docker:2376
 ENV DOCKER_TLS_VERIFY=1
 ENV DOCKER_CERT_PATH=/certs/client
+ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILDKITE_BAZEL_CACHE_URL=${REMOTE_CACHE_URL}
 
 RUN apt-get update -qq && apt-get upgrade -qq

From b2b8f60391031097ce20318b897b76a5d5ea59ed Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sat, 17 Sep 2022 17:20:47 +0100
Subject: [PATCH 44/61] New commit

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.gpu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/Dockerfile.gpu
index 99e184e388a2..88e87e1514e8 100644
--- a/ci/docker/Dockerfile.gpu
+++ b/ci/docker/Dockerfile.gpu
@@ -21,6 +21,8 @@ WORKDIR /ray
 # Copy new ray files
 COPY . .
 
+RUN env
+
 # Install Ray
 RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 

From 7810b37d2d1e12cb6ffb820cc08483aa08410055 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 08:58:43 +0100
Subject: [PATCH 45/61] Re-isntall conda on minimal install, download llvm anew

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-dependencies.sh  | 9 +++------
 ci/env/install-llvm-binaries.sh | 3 ++-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index 6bd11b03a1a2..eddd85a290c9 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -80,7 +80,7 @@ install_miniconda() {
     conda="$(command -v conda || true)"
   fi
 
-  if [ ! -x "${conda}" ]; then  # If no conda is found, install it
+  if [ ! -x "${conda}" ] || [ "${MINIMAL_INSTALL-}" != 1 ]; then  # If no conda is found, install it
     local miniconda_dir  # Keep directories user-independent, to help with Bazel caching
     case "${OSTYPE}" in
       linux*) miniconda_dir="/opt/miniconda";;
@@ -115,6 +115,7 @@ install_miniconda() {
         conda="${miniconda_dir}\Scripts\conda.exe"
         ;;
       *)
+        rm -rf "${miniconda_dir}"
         mkdir -p -- "${miniconda_dir}"
         # We're forced to pass -b for non-interactive mode.
         # Unfortunately it inhibits PATH modifications as a side effect.
@@ -148,11 +149,7 @@ install_miniconda() {
     (
       set +x
       echo "Resetting Anaconda Python ${python_version}..."
-      source /opt/miniconda/etc/profile.d/conda.sh
-      "${WORKSPACE_DIR}"/ci/suppress_output conda create -q -y -n minimal python==${PYTHON} pip
-      "${WORKSPACE_DIR}"/ci/suppress_output conda activate minimal
-      echo "source /opt/miniconda/etc/profile.d/conda.sh" >> "$HOME/.bashrc"
-      echo "conda activate minimal" >> "$HOME/.bashrc"
+      "${WORKSPACE_DIR}"/ci/suppress_output conda install -q -y --rev 0
     )
   fi
 
diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index d8b60a467c8f..d6097507dc40 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -104,7 +104,8 @@ if [ -n "${BUILDKITE-}" ] && [ -f "$LLVM_DOWNLOAD_URL_FILENAME" ]; then
   read -r line < "$LLVM_DOWNLOAD_URL_FILENAME"
   if [ "$line" == "$LLVM_URL" ]; then
     printInfo "Skipping llvm download/install on Buildkite because LLVM was previously installed from the same URL ${line}."
-    exit 0
+    # Todo: either remove or skip again
+    # exit 0
   fi
 fi
 

From fbc1209eb2696b68c7f20f3c23b38088946a785e Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 09:36:17 +0100
Subject: [PATCH 46/61] miniconda fix

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-dependencies.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index eddd85a290c9..abe3e6be810f 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -80,7 +80,7 @@ install_miniconda() {
     conda="$(command -v conda || true)"
   fi
 
-  if [ ! -x "${conda}" ] || [ "${MINIMAL_INSTALL-}" != 1 ]; then  # If no conda is found, install it
+  if [ ! -x "${conda}" ] || [ "${MINIMAL_INSTALL-}" = 1 ]; then  # If no conda is found, install it
     local miniconda_dir  # Keep directories user-independent, to help with Bazel caching
     case "${OSTYPE}" in
       linux*) miniconda_dir="/opt/miniconda";;

From d9f67b29b1dabd38d88c5c92db2c6c27468261f0 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 10:07:04 +0100
Subject: [PATCH 47/61] Fully revert llvm install check

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-llvm-binaries.sh | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/ci/env/install-llvm-binaries.sh b/ci/env/install-llvm-binaries.sh
index d6097507dc40..a18c1fd8c021 100755
--- a/ci/env/install-llvm-binaries.sh
+++ b/ci/env/install-llvm-binaries.sh
@@ -27,11 +27,9 @@ trap '[ $? -eq 0 ] || log_err' EXIT
 
 LLVM_URL="https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz"
 TARGET_DIR="/opt/llvm"
-LLVM_DOWNLOAD_URL_FILENAME="${TARGET_DIR}/llvm_download_url.txt"
 
 install_llvm() {
   local url targetdir
-
   if [ $# -ge 1 ]; then
     url="$1"
   else
@@ -95,20 +93,8 @@ build:llvm --linkopt='-fuse-ld=${targetdir}/bin/ld.lld'
 build:llvm --linkopt='-L${targetdir}/lib'
 build:llvm --linkopt='-Wl,-rpath,${targetdir}/lib'
 # ==== end of --config=llvm options generated by ci/env/install-llvm-binaries.sh" >> .llvm-local.bazelrc
-
-  echo "$url" > $LLVM_DOWNLOAD_URL_FILENAME
-  printInfo "LLVML installed and URL of current llvm install logged to $LLVM_DOWNLOAD_URL_FILENAME"
 }
 
-if [ -n "${BUILDKITE-}" ] && [ -f "$LLVM_DOWNLOAD_URL_FILENAME" ]; then
-  read -r line < "$LLVM_DOWNLOAD_URL_FILENAME"
-  if [ "$line" == "$LLVM_URL" ]; then
-    printInfo "Skipping llvm download/install on Buildkite because LLVM was previously installed from the same URL ${line}."
-    # Todo: either remove or skip again
-    # exit 0
-  fi
-fi
-
 
 if [ ! -f ".bazelrc" ]; then
     printError ".bazelrc not found under working directory. Please run this script under repository root."

From 762b8411c04a7532d8ff4080698d6cd27aab4798 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 10:10:33 +0100
Subject: [PATCH 48/61] Fix install minimal

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-minimal.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/env/install-minimal.sh b/ci/env/install-minimal.sh
index 8b7f3e555fbe..9c3293ca9d73 100755
--- a/ci/env/install-minimal.sh
+++ b/ci/env/install-minimal.sh
@@ -2,18 +2,18 @@
 
 # Python version can be specified as 3.7, 3.8, 3.9, etc..
 if [ -z "$1" ]; then
-    PYTHON_VERSION="$1"
+    PYTHON_VERSION=${PYTHON-3.7}
 else
     if [ "$1" = "3.6" ]; then
-        PYTHON_VERSION=${PYTHON-3.6}
+        PYTHON_VERSION="3.6"
     elif [ "$1" = "3.7" ]; then
-        PYTHON_VERSION=${PYTHON-3.7}
+        PYTHON_VERSION="3.7"
     elif [ "$1" = "3.8" ]; then
-        PYTHON_VERSION=${PYTHON-3.8}
+        PYTHON_VERSION="3.8"
     elif [ "$1" = "3.9" ]; then
-        PYTHON_VERSION=${PYTHON-3.9}
+        PYTHON_VERSION="3.9"
     elif [ "$1" = "3.10" ]; then
-        PYTHON_VERSION=${PYTHON-3.10}
+        PYTHON_VERSION="3.10"
     else
         echo "Unsupported Python version."
         exit 1

From 1aad104d16a69b6175ef7914174c6e6efa25e849 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 11:29:50 +0100
Subject: [PATCH 49/61] Only delete conda on minimal install

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/Dockerfile.ml        | 2 +-
 ci/docker/Dockerfile.test      | 2 +-
 ci/env/install-dependencies.sh | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ci/docker/Dockerfile.ml b/ci/docker/Dockerfile.ml
index d29a153eca5b..fcdf7aeb9d14 100644
--- a/ci/docker/Dockerfile.ml
+++ b/ci/docker/Dockerfile.ml
@@ -15,4 +15,4 @@ COPY . .
 # Install Ray
 RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 
-RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
+RUN RLLIB_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/docker/Dockerfile.test b/ci/docker/Dockerfile.test
index 1bac67a050bc..a0645b154c2b 100644
--- a/ci/docker/Dockerfile.test
+++ b/ci/docker/Dockerfile.test
@@ -15,4 +15,4 @@ COPY . .
 # Install Ray
 RUN SKIP_BAZEL_BUILD=1 RAY_INSTALL_JAVA=0 bash --login -i -c -- "python3 -m pip install -e /ray/python/"
 
-RUN ./ci/env/install-dependencies.sh
+RUN bash --login -i ./ci/env/install-dependencies.sh
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index abe3e6be810f..a0017cd6162f 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -115,7 +115,9 @@ install_miniconda() {
         conda="${miniconda_dir}\Scripts\conda.exe"
         ;;
       *)
-        rm -rf "${miniconda_dir}"
+        if [ "${MINIMAL_INSTALL-}" = 1 ]; then
+          rm -rf "${miniconda_dir}"
+        fi
         mkdir -p -- "${miniconda_dir}"
         # We're forced to pass -b for non-interactive mode.
         # Unfortunately it inhibits PATH modifications as a side effect.

From 232b6c56e594efd75aaa2bee142a9845628d986c Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 12:17:29 +0100
Subject: [PATCH 50/61] Move documentation test

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml | 12 ++++++++++++
 .buildkite/pipeline.test.yml  | 13 -------------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index 4dc3285e1f22..23653b546e1d 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -517,6 +517,18 @@
       --test_env=KUBECONFIG=/root/.kube/config
       python/ray/tests/...
 
+- label: ":book: Documentation"
+  commands:
+    - export LINT=1
+    - echo "--- Setting up Python 3.7 environment."
+    - ./ci/env/install-dependencies.sh
+    # Specifying above somehow messes up the Ray install.
+    # Uninstall and re-install Ray so that we can use Ray Client
+    # (remove thirdparty_files to sidestep an issue with psutil).
+    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
+    - pushd /ray && git clean -f -f -x -d -e .whl -e python/ray/dashboard/client && popd
+    - bazel clean --expunge
+    - ./ci/ci.sh build
 
 - label: ":octopus: Tune multinode tests"
   conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml
index de1ed55a9318..01bc8341dc16 100644
--- a/.buildkite/pipeline.test.yml
+++ b/.buildkite/pipeline.test.yml
@@ -5,19 +5,6 @@
     - ./ci/env/install-dependencies.sh
     - ./ci/ci.sh lint
 
-- label: ":book: Documentation"
-  commands:
-    - export LINT=1
-    - echo "--- Setting up Python 3.7 environment."
-    - ./ci/env/install-dependencies.sh
-    # Specifying above somehow messes up the Ray install.
-    # Uninstall and re-install Ray so that we can use Ray Client
-    # (remove thirdparty_files to sidestep an issue with psutil).
-    - pip uninstall -y ray && rm -rf /ray/python/ray/thirdparty_files
-    - pushd /ray && git clean -f -f -x -d -e .whl -e python/ray/dashboard/client && popd
-    - bazel clean --expunge
-    - ./ci/ci.sh build
-
 - label: ":book: LinkCheck"
   commands:
     - export LINT=1

From 33d5de579091bc9a0acf280db91942aee320e031 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 17:23:52 +0100
Subject: [PATCH 51/61] Fix some tests

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .flake8                              | 1 +
 python/ray/tests/BUILD               | 2 +-
 python/ray/tests/test_basic_3.py     | 4 ++++
 python/ray/tests/test_runtime_env.py | 4 ++++
 python/ray/tests/test_state_api.py   | 6 +++---
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/.flake8 b/.flake8
index e37fb41c96b9..82772b8f7a7c 100644
--- a/.flake8
+++ b/.flake8
@@ -12,6 +12,7 @@ max-line-length = 88
 inline-quotes = "
 ignore =
   C408
+  C417
   E121
   E123
   E126
diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD
index 00393aa44e45..3567666d2526 100644
--- a/python/ray/tests/BUILD
+++ b/python/ray/tests/BUILD
@@ -336,7 +336,7 @@ py_test_module_list(
 
 py_test(
   name = "test_runtime_env_complicated",
-  size = "large",
+  size = "enormous",
   srcs = ["test_runtime_env_complicated.py"],
   tags = ["exclusive", "post_wheel_build", "team:serve"],
   deps = ["//:ray_lib", ":conftest"],
diff --git a/python/ray/tests/test_basic_3.py b/python/ray/tests/test_basic_3.py
index d41e2a444781..14c4323b1c49 100644
--- a/python/ray/tests/test_basic_3.py
+++ b/python/ray/tests/test_basic_3.py
@@ -60,6 +60,10 @@ def collected(self):
     assert ray.get(test.collected.remote())
 
 
+@pytest.mark.skipif(
+    sys.version_info >= (3, 10, 0),
+    reason=("Currently not passing for Python 3.10"),
+)
 def test_many_fractional_resources(shutdown_only):
     ray.init(num_cpus=2, num_gpus=2, resources={"Custom": 2})
 
diff --git a/python/ray/tests/test_runtime_env.py b/python/ray/tests/test_runtime_env.py
index a559e7ba3d42..b9c3174443d0 100644
--- a/python/ray/tests/test_runtime_env.py
+++ b/python/ray/tests/test_runtime_env.py
@@ -413,6 +413,10 @@ def enable_dev_mode(local_env_var_enabled):
 @pytest.mark.skipif(
     sys.platform == "win32", reason="conda in runtime_env unsupported on Windows."
 )
+@pytest.mark.skipif(
+    sys.version_info >= (3, 10, 0),
+    reason=("Currently not passing for Python 3.10"),
+)
 @pytest.mark.parametrize("local_env_var_enabled", [False, True])
 @pytest.mark.parametrize("runtime_env_class", [dict, RuntimeEnv])
 def test_runtime_env_log_msg(
diff --git a/python/ray/tests/test_state_api.py b/python/ray/tests/test_state_api.py
index 5193e6ba94b1..75a9b04c6f87 100644
--- a/python/ray/tests/test_state_api.py
+++ b/python/ray/tests/test_state_api.py
@@ -693,7 +693,7 @@ async def test_api_manager_list_workers(state_api_manager):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7, 0),
+    sys.version_info < (3, 8, 0),
     reason=("Not passing in CI although it works locally. Will handle it later."),
 )
 @pytest.mark.asyncio
@@ -784,7 +784,7 @@ async def test_api_manager_list_tasks(state_api_manager):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7, 0),
+    sys.version_info < (3, 8, 0),
     reason=("Not passing in CI although it works locally. Will handle it later."),
 )
 @pytest.mark.asyncio
@@ -896,7 +896,7 @@ async def test_api_manager_list_objects(state_api_manager):
 
 
 @pytest.mark.skipif(
-    sys.version_info <= (3, 7, 0),
+    sys.version_info < (3, 8, 0),
     reason=("Not passing in CI although it works locally. Will handle it later."),
 )
 @pytest.mark.asyncio

From 5353260f922bb9849ea4f2ad245be6e4ac748bd9 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 17:25:39 +0100
Subject: [PATCH 52/61] Legacy Dockerfile compat

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/Dockerfile     | 3 +++
 .buildkite/Dockerfile.gpu | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/.buildkite/Dockerfile b/.buildkite/Dockerfile
index 13d3d8de279a..a34ac1c5c861 100644
--- a/.buildkite/Dockerfile
+++ b/.buildkite/Dockerfile
@@ -51,6 +51,9 @@ ENV LC_ALL=en_US.utf8
 ENV LANG=en_US.utf8
 RUN echo "ulimit -c 0" >> /root/.bashrc
 
+ENV BUILD=1
+ENV DL=1
+
 # Setup Bazel caches
 RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
     (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \
diff --git a/.buildkite/Dockerfile.gpu b/.buildkite/Dockerfile.gpu
index eabbfd4bf4c6..9232b9d1bfae 100644
--- a/.buildkite/Dockerfile.gpu
+++ b/.buildkite/Dockerfile.gpu
@@ -53,6 +53,9 @@ ENV LC_ALL=en_US.utf8
 ENV LANG=en_US.utf8
 RUN echo "ulimit -c 0" >> /root/.bashrc
 
+ENV BUILD=1
+ENV DL=1
+
 # Setup Bazel caches
 RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
     (if [ "${BUILDKITE_PULL_REQUEST}" != "false" ]; then (echo "build --remote_upload_local_results=false" >> /root/.bazelrc); fi); \

From 6029a2bd23dcc15a7fc4d1e2f5dfc6a58dbb01de Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 17:58:16 +0100
Subject: [PATCH 53/61] Shellcheck

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-dependencies.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index a0017cd6162f..acfab1bffb17 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -297,9 +297,9 @@ install_pip_packages() {
     pip install --no-clean dm-tree==0.1.5  # --no-clean is due to: https://github.com/deepmind/tree/issues/5
   fi
 
-  if ([ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]) || [ "${DL-}" = "1" ]; then
+  if { [ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]; } || [ "${DL-}" = "1" ]; then
     # Remove this entire section once Serve dependencies are fixed.
-    if ([ -z "${BUILDKITE-}" ] || [ "${DL-}" = "1" ]) && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
+    if { [ -z "${BUILDKITE-}" ] || [ "${DL-}" = "1" ]; } && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
       # We want to install the CPU version only.
       pip install -r "${WORKSPACE_DIR}"/python/requirements/ml/requirements_dl.txt
     fi
@@ -377,7 +377,7 @@ install_pip_packages() {
   # dependencies with Modin.
   if [ "${INSTALL_LUDWIG-}" = 1 ]; then
     # TODO: eventually pin this to master.
-    pip install -U "ludwig[test]">=0.4 jsonschema>=4
+    pip install -U "ludwig[test]>=0.4" "jsonschema>=4"
   fi
 
   # Data processing test dependencies.

From d785efec6e0f2ed2d91cbc48019ec8799227d199 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 18:56:34 +0100
Subject: [PATCH 54/61] move lint

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.build.yml | 6 ++++++
 .buildkite/pipeline.test.yml  | 7 -------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
index 23653b546e1d..e9b35db67816 100644
--- a/.buildkite/pipeline.build.yml
+++ b/.buildkite/pipeline.build.yml
@@ -1,3 +1,9 @@
+- label: ":book: Lint"
+  commands:
+    - export LINT=1
+    - ./ci/env/install-dependencies.sh
+    - ./ci/ci.sh lint
+
 - label: ":ferris_wheel: Wheels and Jars"
   conditions:
     [
diff --git a/.buildkite/pipeline.test.yml b/.buildkite/pipeline.test.yml
index 01bc8341dc16..1e43f762037c 100644
--- a/.buildkite/pipeline.test.yml
+++ b/.buildkite/pipeline.test.yml
@@ -1,10 +1,3 @@
-
-- label: ":book: Lint"
-  commands:
-    - export LINT=1
-    - ./ci/env/install-dependencies.sh
-    - ./ci/ci.sh lint
-
 - label: ":book: LinkCheck"
   commands:
     - export LINT=1

From 8c99a8e694b4223fbbba804ccf35455f08fee1f3 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Sun, 18 Sep 2022 20:19:56 +0100
Subject: [PATCH 55/61] Do not run runtime env complicated on py 3.9/3.10

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/ci.sh               | 9 +++++++--
 python/ray/tests/BUILD | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/ci/ci.sh b/ci/ci.sh
index d2d665698e3d..2b3aaad7bc4b 100755
--- a/ci/ci.sh
+++ b/ci/ci.sh
@@ -790,8 +790,13 @@ test_minimal() {
   bazel test --test_output=streamed --config=ci ${BAZEL_EXPORT_OPTIONS} python/ray/tests/test_runtime_env
   # shellcheck disable=SC2086
   bazel test --test_output=streamed --config=ci ${BAZEL_EXPORT_OPTIONS} python/ray/tests/test_runtime_env_2
-  # shellcheck disable=SC2086
-  bazel test --test_output=streamed --config=ci ${BAZEL_EXPORT_OPTIONS} python/ray/tests/test_runtime_env_complicated
+
+  # Todo: Make compatible with python 3.9/3.10
+  if [ "$1" != "3.9" ] && [ "$1" != "3.10" ]; then
+    # shellcheck disable=SC2086
+    bazel test --test_output=streamed --config=ci ${BAZEL_EXPORT_OPTIONS} python/ray/tests/test_runtime_env_complicated
+  fi
+
   # shellcheck disable=SC2086
   bazel test --test_output=streamed --config=ci ${BAZEL_EXPORT_OPTIONS} python/ray/tests/test_runtime_env_validation
   # shellcheck disable=SC2086
diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD
index 3567666d2526..00393aa44e45 100644
--- a/python/ray/tests/BUILD
+++ b/python/ray/tests/BUILD
@@ -336,7 +336,7 @@ py_test_module_list(
 
 py_test(
   name = "test_runtime_env_complicated",
-  size = "enormous",
+  size = "large",
   srcs = ["test_runtime_env_complicated.py"],
   tags = ["exclusive", "post_wheel_build", "team:serve"],
   deps = ["//:ray_lib", ":conftest"],

From 4b82b4ee2e26b9e843dc292a5fd4ca54bd8c896b Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Mon, 19 Sep 2022 19:17:40 +0100
Subject: [PATCH 56/61] Fix DL install for minimal install

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/env/install-dependencies.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
index acfab1bffb17..29a090a958ad 100755
--- a/ci/env/install-dependencies.sh
+++ b/ci/env/install-dependencies.sh
@@ -297,7 +297,7 @@ install_pip_packages() {
     pip install --no-clean dm-tree==0.1.5  # --no-clean is due to: https://github.com/deepmind/tree/issues/5
   fi
 
-  if { [ -n "${PYTHON-}" ] && [ "${MINIMAL_INSTALL-}" != 1 ]; } || [ "${DL-}" = "1" ]; then
+  if { [ -n "${PYTHON-}" ] || [ "${DL-}" = "1" ]; } && [ "${MINIMAL_INSTALL-}" != 1 ]; then
     # Remove this entire section once Serve dependencies are fixed.
     if { [ -z "${BUILDKITE-}" ] || [ "${DL-}" = "1" ]; } && [ "${DOC_TESTING-}" != 1 ] && [ "${TRAIN_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ] && [ "${RLLIB_TESTING-}" != 1 ]; then
       # We want to install the CPU version only.

From 556f5e39bb8a3ed0d4ac69bfcf771533091de107 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Mon, 19 Sep 2022 21:04:26 +0100
Subject: [PATCH 57/61] Skip test in py 3.10

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 python/ray/tests/test_runtime_env_2.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/ray/tests/test_runtime_env_2.py b/python/ray/tests/test_runtime_env_2.py
index c6570128ee51..6a3ed598b00d 100644
--- a/python/ray/tests/test_runtime_env_2.py
+++ b/python/ray/tests/test_runtime_env_2.py
@@ -13,6 +13,10 @@
 @pytest.mark.skipif(
     sys.platform == "win32", reason="conda in runtime_env unsupported on Windows."
 )
+@pytest.mark.skipif(
+    sys.version_info >= (3, 10, 0),
+    reason=("Currently not passing on Python 3.10"),
+)
 @pytest.mark.parametrize("runtime_env_class", [dict, RuntimeEnv])
 @pytest.mark.parametrize(
     "set_bad_runtime_env_cache_ttl_seconds",

From fbccde2f13a9b1ef61fd3434f349caa34c351966 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Tue, 20 Sep 2022 12:15:24 +0100
Subject: [PATCH 58/61] legacy build: Py 3.7

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/Dockerfile b/.buildkite/Dockerfile
index a34ac1c5c861..6faded1f9e82 100644
--- a/.buildkite/Dockerfile
+++ b/.buildkite/Dockerfile
@@ -4,7 +4,7 @@ ARG REMOTE_CACHE_URL
 ARG BUILDKITE_PULL_REQUEST
 ARG BUILDKITE_COMMIT
 ARG BUILDKITE_PULL_REQUEST_BASE_BRANCH
-ARG PYTHON=3.6
+ARG PYTHON=3.7
 ARG INSTALL_DEPENDENCIES
 
 ENV DEBIAN_FRONTEND=noninteractive

From 043a8778e518501b0d562f09ca0e3d13b221fc61 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Tue, 20 Sep 2022 13:47:20 +0100
Subject: [PATCH 59/61] Mac+Win

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/pipeline.macos.yml   | 2 ++
 .buildkite/pipeline.windows.yml | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.buildkite/pipeline.macos.yml b/.buildkite/pipeline.macos.yml
index def698b03f61..f3bf5f8c1b3c 100644
--- a/.buildkite/pipeline.macos.yml
+++ b/.buildkite/pipeline.macos.yml
@@ -7,6 +7,8 @@ common: &common
     RAY_DEFAULT_BUILD: "1"
     LC_ALL: en_US.UTF-8
     LANG: en_US.UTF-8
+    BUILD: "1"
+    DL: "1"
 
 prelude_commands: &prelude_commands |-
   rm -rf /tmp/bazel_event_logs
diff --git a/.buildkite/pipeline.windows.yml b/.buildkite/pipeline.windows.yml
index f21b9c20c4ef..fef6b5187115 100644
--- a/.buildkite/pipeline.windows.yml
+++ b/.buildkite/pipeline.windows.yml
@@ -14,6 +14,8 @@ prelude_commands: &prelude_commands |-
     export RAY_DEFAULT_BUILD="1"
     export LC_ALL="en_US.UTF-8"
     export LANG="en_US.UTF-8"
+    export BUILD="1"
+    export DL="1"
     powershell ci/pipeline/fix-windows-container-networking.ps1
     cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
 

From 4b221150c7baaa118c00602781b0658858f6b3b2 Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 22 Sep 2022 07:52:00 +0100
Subject: [PATCH 60/61] Rename dockerfiles

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 ci/docker/{Dockerfile.base_build => base.build.Dockerfile} | 0
 ci/docker/{Dockerfile.base_gpu => base.gpu.Dockerfile}     | 0
 ci/docker/{Dockerfile.base_ml => base.ml.Dockerfile}       | 0
 ci/docker/{Dockerfile.base_test => base.test.Dockerfile}   | 0
 ci/docker/{Dockerfile.build => build.Dockerfile}           | 0
 ci/docker/{Dockerfile.gpu => gpu.Dockerfile}               | 0
 ci/docker/{Dockerfile.ml => ml.Dockerfile}                 | 0
 ci/docker/{Dockerfile.test => test.Dockerfile}             | 0
 8 files changed, 0 insertions(+), 0 deletions(-)
 rename ci/docker/{Dockerfile.base_build => base.build.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.base_gpu => base.gpu.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.base_ml => base.ml.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.base_test => base.test.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.build => build.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.gpu => gpu.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.ml => ml.Dockerfile} (100%)
 rename ci/docker/{Dockerfile.test => test.Dockerfile} (100%)

diff --git a/ci/docker/Dockerfile.base_build b/ci/docker/base.build.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.base_build
rename to ci/docker/base.build.Dockerfile
diff --git a/ci/docker/Dockerfile.base_gpu b/ci/docker/base.gpu.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.base_gpu
rename to ci/docker/base.gpu.Dockerfile
diff --git a/ci/docker/Dockerfile.base_ml b/ci/docker/base.ml.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.base_ml
rename to ci/docker/base.ml.Dockerfile
diff --git a/ci/docker/Dockerfile.base_test b/ci/docker/base.test.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.base_test
rename to ci/docker/base.test.Dockerfile
diff --git a/ci/docker/Dockerfile.build b/ci/docker/build.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.build
rename to ci/docker/build.Dockerfile
diff --git a/ci/docker/Dockerfile.gpu b/ci/docker/gpu.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.gpu
rename to ci/docker/gpu.Dockerfile
diff --git a/ci/docker/Dockerfile.ml b/ci/docker/ml.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.ml
rename to ci/docker/ml.Dockerfile
diff --git a/ci/docker/Dockerfile.test b/ci/docker/test.Dockerfile
similarity index 100%
rename from ci/docker/Dockerfile.test
rename to ci/docker/test.Dockerfile

From f567f7b1b12cd845c34eda8c431b1a64b678fc9c Mon Sep 17 00:00:00 2001
From: Kai Fricke <kai@anyscale.com>
Date: Thu, 22 Sep 2022 09:07:13 +0100
Subject: [PATCH 61/61] Docs

Signed-off-by: Kai Fricke <kai@anyscale.com>
---
 .buildkite/README.md | 30 ++++++++++++++++++++++++++++++
 ci/docker/README.md  | 29 +++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 .buildkite/README.md
 create mode 100644 ci/docker/README.md

diff --git a/.buildkite/README.md b/.buildkite/README.md
new file mode 100644
index 000000000000..9df1111ef3fb
--- /dev/null
+++ b/.buildkite/README.md
@@ -0,0 +1,30 @@
+# Buildkite pipelines
+
+This directory contains buildkite pipelines used to start CI tests.
+
+Each step contains a buildkite step that is parsed and executed according to the 
+[Buildkite pipeline specification](https://buildkite.com/docs/pipelines).
+
+## Conditions
+
+An extra optional field `conditions` is defined, which includes conditions under which tests are run.
+The script `ci/pipeline/determine_tests_to_run.py` determines changed files in a PR and only kicks off
+tests that include at least one of the conditions. If no condition is specified, the test is always run.
+
+A special case is the `NO_WHEELS_REQUIRED` condition. If this is present, it indicates that the test can
+be run with the latest available binaries - in this case the test can be started early, as it will re-use
+the latest branch image and only check out the current code revision in the PR. This early kick off will
+only trigger on PR builds, not on branch builds.
+
+## Pipelines
+
+This directory should be considered with respect to the docker images located in `ci/docker`.
+
+- `pipeline.build.yml` contains jobs that require build dependencies. This includes all tests that re-build
+  Ray (e.g. when switching Python versions). The tests are run on the `build.Dockerfile` image.
+- `pipeline.test.yml` contains jobs that only require an installed Ray and a small subset of dependencies,
+  notably exlcuding ML libraries such as Tensorflow or Torch. The tests are run on the `test.Dockerfile` image.
+- `pipeline.ml.yml` contains jobs that require ML libraries Tensorflow and Torch to be available. The tests
+  are run on the `ml.Dockerfile` image.
+- `pipeline.gpu.yml` contains jobs that require one GPU. The tests are run on the `gpu.Dockerfile` image.
+- `pipeline.gpu.large.yml` contains jobs that require multi-GPUs (currently 4). The tests are run on the `gpu.Dockerfile` image.
diff --git a/ci/docker/README.md b/ci/docker/README.md
new file mode 100644
index 000000000000..132d36e0c6af
--- /dev/null
+++ b/ci/docker/README.md
@@ -0,0 +1,29 @@
+# CI Docker files
+
+This directory contains the Dockerfiles used to build the CI system.
+
+These are _not_ the Dockerfiles that build the `rayproject/ray` releases. These 
+are found in the `/docker` directory under the root.
+
+The Dockerfiles are hierarchical and will be built in different places during a CI run.
+
+## Base images
+
+The base images are built per-branch either when they are first requested or on a periodic basis
+(for the master branch). The base images contain the latest dependencies of the respective branch.
+Every per-commit build will always install the latest dependencies to make sure everything is up to date.
+However, by using the base images as a source, this will mostly be a no or low cost operation.
+
+- `base.test.Dockerfile` contains common dependencies for all images
+- `base.build.Dockerfile` inherits from `base.test` and installs build dependencies like Java and LLVM
+- `base.ml.Dockerfile` inherits from `base.test` and installs ML dependencies like torch/tensorflow
+- `base.gpu.Dockerfile` inherits from a CUDA base image and otherwise contains the same content as `base.test` and `base.ml`.
+
+## Per-commit images
+
+On every commit, the following images are built in this order:
+
+- `build.Dockerfile` (based on `base.build`) which will build the Ray binaries
+- `test.Dockerfile` (based on `base.test`), where we will inject the built Ray libraries
+- `ml.Dockerfile` (based on `base.ml`), where we will inject the built Ray libraries
+- `gpu.Dockerfile` (based on `base.ml`), where we will inject the built Ray libraries