From e7fccbcfb2d5ded835be873a9a32e93b48f09c06 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 28 Sep 2022 12:36:40 -0400 Subject: [PATCH] [Feature] Running end-to-end tests on local machine (#589) We need to download (docker pull) all necessary docker images before we load the images into a KinD cluster. Without this, the command kind load docker-image kuberay/apiserver:nightly will report an error message as follows. ERROR: image: "kuberay/apiserver:nightly" not present locally In addition, we also use kind load command to preload the rayproject/ray image into KinD cluster. Although the Kubernetes cluster will pull the ray image automatically when we create a custom resource RayCluster, pulling rayproject/ray:1.9.0 (2.44GB) may cause ImagePullBackOff (Link) due to timeout. In addition, the RayCluster startup time will vary based on network speed and make the tests flaky. --- .../actions/compatibility/action.yaml | 5 +- ray-operator/DEVELOPMENT.md | 18 +++++- tests/compatibility-test.py | 57 ++++++++++--------- tests/kuberay_utils/utils.py | 37 ++++++------ 4 files changed, 66 insertions(+), 51 deletions(-) diff --git a/.github/workflows/actions/compatibility/action.yaml b/.github/workflows/actions/compatibility/action.yaml index 0b49a26425f..83fdf127489 100644 --- a/.github/workflows/actions/compatibility/action.yaml +++ b/.github/workflows/actions/compatibility/action.yaml @@ -68,5 +68,8 @@ runs: kustomize edit set image kuberay/apiserver=kuberay/apiserver:${{ steps.vars.outputs.sha_short }} popd echo "Using Ray image ${{ inputs.ray_version }}" - PYTHONPATH="./tests/" RAY_VERSION="${{ inputs.ray_version }}" KUBERAY_IMG_SHA=${{ steps.vars.outputs.sha_short }} python ./tests/compatibility-test.py + PYTHONPATH="./tests/" \ + RAY_IMAGE="rayproject/ray:${{ inputs.ray_version }}" \ + OPERATOR_IMAGE="kuberay/operator:${{ steps.vars.outputs.sha_short }}" \ + APISERVER_IMAGE="kuberay/apiserver:${{ steps.vars.outputs.sha_short }}" python ./tests/compatibility-test.py shell: bash diff --git a/ray-operator/DEVELOPMENT.md b/ray-operator/DEVELOPMENT.md index aed4bf44cca..4ad08b11999 100644 --- a/ray-operator/DEVELOPMENT.md +++ b/ray-operator/DEVELOPMENT.md @@ -116,4 +116,20 @@ make sync # Reproduce CI error for job "helm-chart-verify-rbac" (consistency 5) python3 ../scripts/rbac-check.py -``` \ No newline at end of file +``` + +### Run end-to-end tests locally + +We have some [end-to-end tests](https://github.com/ray-project/kuberay/blob/master/.github/workflows/actions/compatibility/action.yaml) on GitHub Actions. +These tests operate small Ray clusters running within a [kind](https://kind.sigs.k8s.io/) (Kubernetes-in-docker) environment. To run the tests yourself, follow these steps: + +* Step1: Install related dependencies, including [kind](https://kind.sigs.k8s.io/), [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/), and [kustomize](https://kustomize.io/). + +* Step2: You must be in `/path/to/your/kuberay/`. + ```bash + # [Usage]: RAY_IMAGE=$RAY_IMAGE OPERATOR_IMAGE=$OPERATOR_IMAGE APISERVER_IMAGE=$APISERVER_IMAGE python3 tests/compatibility-test.py + # These 3 environment variables are optional. + # [Example]: + RAY_IMAGE=rayproject/ray:2.0.0 OPERATOR_IMAGE=kuberay/operator:nightly APISERVER_IMAGE=kuberay/apiserver:nightly python3 tests/compatibility-test.py + ``` + diff --git a/tests/compatibility-test.py b/tests/compatibility-test.py index c85cdddab45..695896cb6d8 100755 --- a/tests/compatibility-test.py +++ b/tests/compatibility-test.py @@ -11,11 +11,13 @@ logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -# kuberay_sha, ray_version & ray_image are default values that -# will be changed by parse_environment() -kuberay_sha = 'nightly' +# Image version ray_version = '1.9.0' -ray_image = "rayproject/ray:1.9.0" + +# Docker images +ray_image = 'rayproject/ray:1.9.0' +kuberay_operator_image = 'kuberay/operator:nightly' +kuberay_apiserver_image = 'kuberay/apiserver:nightly' class BasicRayTestCase(unittest.TestCase): @@ -30,8 +32,9 @@ def setUpClass(cls): # ray cluster running inside Kind environment. utils.delete_cluster() utils.create_cluster() - utils.apply_kuberay_resources(kuberay_sha) - utils.download_images(ray_image) + images = [ray_image, kuberay_operator_image, kuberay_apiserver_image] + utils.download_images(images) + utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image) utils.create_kuberay_cluster(BasicRayTestCase.cluster_template_file, ray_version, ray_image) @@ -130,18 +133,15 @@ class RayFTTestCase(unittest.TestCase): @classmethod def setUpClass(cls): if not utils.ray_ft_supported(ray_version): - return + raise unittest.SkipTest("ray ft is not supported") utils.delete_cluster() utils.create_cluster() - utils.apply_kuberay_resources(kuberay_sha) - utils.download_images(ray_image) + images = [ray_image, kuberay_operator_image, kuberay_apiserver_image] + utils.download_images(images) + utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image) utils.create_kuberay_cluster(RayFTTestCase.cluster_template_file, ray_version, ray_image) - def setUp(self): - if not utils.ray_ft_supported(ray_version): - raise unittest.SkipTest("ray ft is not supported") - def test_kill_head(self): # This test will delete head node and wait for a new replacement to # come up. @@ -371,22 +371,19 @@ class RayServiceTestCase(unittest.TestCase): @classmethod def setUpClass(cls): if not utils.ray_service_supported(ray_version): - return + raise unittest.SkipTest("ray service is not supported") # Ray Service is running inside a local Kind environment. # We use the Ray nightly version now. # We wait for the serve service ready. # The test will check the successful response from serve service. utils.delete_cluster() utils.create_cluster() - utils.apply_kuberay_resources(kuberay_sha) - utils.download_images(ray_image) + images = [ray_image, kuberay_operator_image, kuberay_apiserver_image] + utils.download_images(images) + utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image) utils.create_kuberay_service( RayServiceTestCase.service_template_file, ray_version, ray_image) - def setUp(self): - if not utils.ray_service_supported(ray_version): - raise unittest.SkipTest("ray service is not supported") - def test_ray_serve_work(self): time.sleep(5) curl_cmd = 'curl -X POST -H \'Content-Type: application/json\' localhost:8000 -d \'["MANGO", 2]\'' @@ -415,17 +412,21 @@ def test_ray_serve_work(self): def parse_environment(): - global ray_version, ray_image, kuberay_sha + global ray_version, ray_image, kuberay_operator_image, kuberay_apiserver_image for k, v in os.environ.items(): - if k == 'RAY_VERSION': - logger.info('Setting Ray image to: {}'.format(v)) - ray_version = v - ray_image = 'rayproject/ray:{}'.format(ray_version) - if k == 'KUBERAY_IMG_SHA': - logger.info('Using KubeRay docker build SHA: {}'.format(v)) - kuberay_sha = v + if k == 'RAY_IMAGE': + ray_image = v + ray_version = ray_image.split(':')[-1] + elif k == 'OPERATOR_IMAGE': + kuberay_operator_image = v + elif k == 'APISERVER_IMAGE': + kuberay_apiserver_image = v if __name__ == '__main__': parse_environment() + logger.info('Setting Ray image to: {}'.format(ray_image)) + logger.info('Setting Ray version to: {}'.format(ray_version)) + logger.info('Setting KubeRay operator image to: {}'.format(kuberay_operator_image)) + logger.info('Setting KubeRay apiserver image to: {}'.format(kuberay_apiserver_image)) unittest.main(verbosity=2) diff --git a/tests/kuberay_utils/utils.py b/tests/kuberay_utils/utils.py index 08286cae73d..ffe2a6b4060 100644 --- a/tests/kuberay_utils/utils.py +++ b/tests/kuberay_utils/utils.py @@ -27,19 +27,15 @@ def parse_ray_version(version_str): def ray_ft_supported(ray_version): if ray_version == "nightly": return True - major, minor, patch = parse_ray_version(ray_version) - if major * 100 + minor <= 113: - return False - return True + major, minor, _ = parse_ray_version(ray_version) + return major * 100 + minor > 113 def ray_service_supported(ray_version): if ray_version == "nightly": return True - major, minor, patch = parse_ray_version(ray_version) - if major * 100 + minor <= 113: - return False - return True + major, minor, _ = parse_ray_version(ray_version) + return major * 100 + minor > 113 def shell_run(cmd): @@ -66,19 +62,16 @@ def create_cluster(): assert rtn == 0 -def apply_kuberay_resources(img_sha='nightly'): - shell_assert_success( - 'kind load docker-image kuberay/operator:{}'.format(img_sha)) - shell_assert_success( - 'kind load docker-image kuberay/apiserver:{}'.format(img_sha)) - shell_assert_success( - 'kubectl create -k manifests/cluster-scope-resources') +def apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image): + for image in images: + shell_assert_success('kind load docker-image {}'.format(image)) + shell_assert_success('kubectl create -k manifests/cluster-scope-resources') # use kustomize to build the yaml, then change the image to the one we want to testing. shell_assert_success( ('rm -f kustomization.yaml && kustomize create --resources manifests/base && ' + 'kustomize edit set image ' + - 'kuberay/operator:nightly=kuberay/operator:{0} kuberay/apiserver:nightly=kuberay/apiserver:{0} && ' + - 'kubectl apply -k .').format(img_sha)) + 'kuberay/operator:nightly={0} kuberay/apiserver:nightly={1} && ' + + 'kubectl apply -k .').format(kuberay_operator_image, kuberay_apiserver_image)) def create_kuberay_cluster(template_name, ray_version, ray_image): @@ -160,11 +153,13 @@ def delete_cluster(): shell_run('kind delete cluster') -def download_images(ray_image): +def download_images(images): client = docker.from_env() - client.images.pull(ray_image) - # not enabled for now - # shell_assert_success('kind load docker-image \"{}\"'.format(ray_image)) + for image in images: + if shell_run('docker image inspect {}'.format(image)) != 0: + # Only pull the image from DockerHub when the image does not + # exist in the local docker registry. + client.images.pull(image) client.close()