Skip to content

Commit

Permalink
[Feature] Running end-to-end tests on local machine (ray-project#589)
Browse files Browse the repository at this point in the history
We need to download (docker pull) all necessary docker images before we load the images into a KinD cluster. Without this, the command kind load docker-image kuberay/apiserver:nightly will report an error message as follows.

ERROR: image: "kuberay/apiserver:nightly" not present locally
In addition, we also use kind load command to preload the rayproject/ray image into KinD cluster. Although the Kubernetes cluster will pull the ray image automatically when we create a custom resource RayCluster, pulling rayproject/ray:1.9.0 (2.44GB) may cause ImagePullBackOff (Link) due to timeout. In addition, the RayCluster startup time will vary based on network speed and make the tests flaky.
  • Loading branch information
kevin85421 authored Sep 28, 2022
1 parent 2a8dcf5 commit e7fccbc
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 51 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/actions/compatibility/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,8 @@ runs:
kustomize edit set image kuberay/apiserver=kuberay/apiserver:${{ steps.vars.outputs.sha_short }}
popd
echo "Using Ray image ${{ inputs.ray_version }}"
PYTHONPATH="./tests/" RAY_VERSION="${{ inputs.ray_version }}" KUBERAY_IMG_SHA=${{ steps.vars.outputs.sha_short }} python ./tests/compatibility-test.py
PYTHONPATH="./tests/" \
RAY_IMAGE="rayproject/ray:${{ inputs.ray_version }}" \
OPERATOR_IMAGE="kuberay/operator:${{ steps.vars.outputs.sha_short }}" \
APISERVER_IMAGE="kuberay/apiserver:${{ steps.vars.outputs.sha_short }}" python ./tests/compatibility-test.py
shell: bash
18 changes: 17 additions & 1 deletion ray-operator/DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,20 @@ make sync

# Reproduce CI error for job "helm-chart-verify-rbac" (consistency 5)
python3 ../scripts/rbac-check.py
```
```

### Run end-to-end tests locally

We have some [end-to-end tests](https://github.com/ray-project/kuberay/blob/master/.github/workflows/actions/compatibility/action.yaml) on GitHub Actions.
These tests operate small Ray clusters running within a [kind](https://kind.sigs.k8s.io/) (Kubernetes-in-docker) environment. To run the tests yourself, follow these steps:

* Step1: Install related dependencies, including [kind](https://kind.sigs.k8s.io/), [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/), and [kustomize](https://kustomize.io/).

* Step2: You must be in `/path/to/your/kuberay/`.
```bash
# [Usage]: RAY_IMAGE=$RAY_IMAGE OPERATOR_IMAGE=$OPERATOR_IMAGE APISERVER_IMAGE=$APISERVER_IMAGE python3 tests/compatibility-test.py
# These 3 environment variables are optional.
# [Example]:
RAY_IMAGE=rayproject/ray:2.0.0 OPERATOR_IMAGE=kuberay/operator:nightly APISERVER_IMAGE=kuberay/apiserver:nightly python3 tests/compatibility-test.py
```

57 changes: 29 additions & 28 deletions tests/compatibility-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# kuberay_sha, ray_version & ray_image are default values that
# will be changed by parse_environment()
kuberay_sha = 'nightly'
# Image version
ray_version = '1.9.0'
ray_image = "rayproject/ray:1.9.0"

# Docker images
ray_image = 'rayproject/ray:1.9.0'
kuberay_operator_image = 'kuberay/operator:nightly'
kuberay_apiserver_image = 'kuberay/apiserver:nightly'


class BasicRayTestCase(unittest.TestCase):
Expand All @@ -30,8 +32,9 @@ def setUpClass(cls):
# ray cluster running inside Kind environment.
utils.delete_cluster()
utils.create_cluster()
utils.apply_kuberay_resources(kuberay_sha)
utils.download_images(ray_image)
images = [ray_image, kuberay_operator_image, kuberay_apiserver_image]
utils.download_images(images)
utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image)
utils.create_kuberay_cluster(BasicRayTestCase.cluster_template_file,
ray_version, ray_image)

Expand Down Expand Up @@ -130,18 +133,15 @@ class RayFTTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
if not utils.ray_ft_supported(ray_version):
return
raise unittest.SkipTest("ray ft is not supported")
utils.delete_cluster()
utils.create_cluster()
utils.apply_kuberay_resources(kuberay_sha)
utils.download_images(ray_image)
images = [ray_image, kuberay_operator_image, kuberay_apiserver_image]
utils.download_images(images)
utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image)
utils.create_kuberay_cluster(RayFTTestCase.cluster_template_file,
ray_version, ray_image)

def setUp(self):
if not utils.ray_ft_supported(ray_version):
raise unittest.SkipTest("ray ft is not supported")

def test_kill_head(self):
# This test will delete head node and wait for a new replacement to
# come up.
Expand Down Expand Up @@ -371,22 +371,19 @@ class RayServiceTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
if not utils.ray_service_supported(ray_version):
return
raise unittest.SkipTest("ray service is not supported")
# Ray Service is running inside a local Kind environment.
# We use the Ray nightly version now.
# We wait for the serve service ready.
# The test will check the successful response from serve service.
utils.delete_cluster()
utils.create_cluster()
utils.apply_kuberay_resources(kuberay_sha)
utils.download_images(ray_image)
images = [ray_image, kuberay_operator_image, kuberay_apiserver_image]
utils.download_images(images)
utils.apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image)
utils.create_kuberay_service(
RayServiceTestCase.service_template_file, ray_version, ray_image)

def setUp(self):
if not utils.ray_service_supported(ray_version):
raise unittest.SkipTest("ray service is not supported")

def test_ray_serve_work(self):
time.sleep(5)
curl_cmd = 'curl -X POST -H \'Content-Type: application/json\' localhost:8000 -d \'["MANGO", 2]\''
Expand Down Expand Up @@ -415,17 +412,21 @@ def test_ray_serve_work(self):


def parse_environment():
global ray_version, ray_image, kuberay_sha
global ray_version, ray_image, kuberay_operator_image, kuberay_apiserver_image
for k, v in os.environ.items():
if k == 'RAY_VERSION':
logger.info('Setting Ray image to: {}'.format(v))
ray_version = v
ray_image = 'rayproject/ray:{}'.format(ray_version)
if k == 'KUBERAY_IMG_SHA':
logger.info('Using KubeRay docker build SHA: {}'.format(v))
kuberay_sha = v
if k == 'RAY_IMAGE':
ray_image = v
ray_version = ray_image.split(':')[-1]
elif k == 'OPERATOR_IMAGE':
kuberay_operator_image = v
elif k == 'APISERVER_IMAGE':
kuberay_apiserver_image = v


if __name__ == '__main__':
parse_environment()
logger.info('Setting Ray image to: {}'.format(ray_image))
logger.info('Setting Ray version to: {}'.format(ray_version))
logger.info('Setting KubeRay operator image to: {}'.format(kuberay_operator_image))
logger.info('Setting KubeRay apiserver image to: {}'.format(kuberay_apiserver_image))
unittest.main(verbosity=2)
37 changes: 16 additions & 21 deletions tests/kuberay_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,15 @@ def parse_ray_version(version_str):
def ray_ft_supported(ray_version):
if ray_version == "nightly":
return True
major, minor, patch = parse_ray_version(ray_version)
if major * 100 + minor <= 113:
return False
return True
major, minor, _ = parse_ray_version(ray_version)
return major * 100 + minor > 113


def ray_service_supported(ray_version):
if ray_version == "nightly":
return True
major, minor, patch = parse_ray_version(ray_version)
if major * 100 + minor <= 113:
return False
return True
major, minor, _ = parse_ray_version(ray_version)
return major * 100 + minor > 113


def shell_run(cmd):
Expand All @@ -66,19 +62,16 @@ def create_cluster():
assert rtn == 0


def apply_kuberay_resources(img_sha='nightly'):
shell_assert_success(
'kind load docker-image kuberay/operator:{}'.format(img_sha))
shell_assert_success(
'kind load docker-image kuberay/apiserver:{}'.format(img_sha))
shell_assert_success(
'kubectl create -k manifests/cluster-scope-resources')
def apply_kuberay_resources(images, kuberay_operator_image, kuberay_apiserver_image):
for image in images:
shell_assert_success('kind load docker-image {}'.format(image))
shell_assert_success('kubectl create -k manifests/cluster-scope-resources')
# use kustomize to build the yaml, then change the image to the one we want to testing.
shell_assert_success(
('rm -f kustomization.yaml && kustomize create --resources manifests/base && ' +
'kustomize edit set image ' +
'kuberay/operator:nightly=kuberay/operator:{0} kuberay/apiserver:nightly=kuberay/apiserver:{0} && ' +
'kubectl apply -k .').format(img_sha))
'kuberay/operator:nightly={0} kuberay/apiserver:nightly={1} && ' +
'kubectl apply -k .').format(kuberay_operator_image, kuberay_apiserver_image))


def create_kuberay_cluster(template_name, ray_version, ray_image):
Expand Down Expand Up @@ -160,11 +153,13 @@ def delete_cluster():
shell_run('kind delete cluster')


def download_images(ray_image):
def download_images(images):
client = docker.from_env()
client.images.pull(ray_image)
# not enabled for now
# shell_assert_success('kind load docker-image \"{}\"'.format(ray_image))
for image in images:
if shell_run('docker image inspect {}'.format(image)) != 0:
# Only pull the image from DockerHub when the image does not
# exist in the local docker registry.
client.images.pull(image)
client.close()


Expand Down

0 comments on commit e7fccbc

Please sign in to comment.