From e45ee2d1e9cccda396079e523d41bff91de48b95 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Tue, 8 Oct 2024 16:27:59 +0100 Subject: [PATCH] refactor: codeflare sdk unit tests sort unit tests into individual files and made minor enhancements --- .../common/kubernetes_cluster/auth_test.py | 166 + src/codeflare_sdk/common/kueue/kueue_test.py | 137 + .../common/utils/generate_cert_test.py | 114 + .../common/utils/unit_test_support.py | 383 ++ .../common/widgets/widgets_test.py | 469 +++ .../ray/appwrapper/awload_test.py | 88 + .../ray/appwrapper/status_test.py | 104 + src/codeflare_sdk/ray/client/ray_jobs_test.py | 173 + src/codeflare_sdk/ray/cluster/cluster_test.py | 610 +++ src/codeflare_sdk/ray/cluster/config_test.py | 164 + .../ray/cluster/generate_yaml_test.py | 34 + .../ray/cluster/pretty_print_test.py | 208 + src/codeflare_sdk/ray/cluster/status_test.py | 114 + tests/demo_test.py | 62 - tests/func_test.py | 21 - .../appwrapper}/test-case-bad.yaml | 0 .../kueue/aw_kueue.yaml} | 16 +- .../kueue/ray_cluster_kueue.yaml | 157 + .../ray/default-appwrapper.yaml | 155 + .../ray/default-ray-cluster.yaml} | 30 +- .../ray/unit-test-all-params.yaml} | 54 +- .../support_clusters/test-aw-a.yaml} | 16 +- .../support_clusters/test-aw-b.yaml | 157 + .../support_clusters/test-rc-a.yaml} | 34 +- .../support_clusters/test-rc-b.yaml | 148 + tests/unit_test.py | 3350 ----------------- tests/unit_test_support.py | 60 - 27 files changed, 3451 insertions(+), 3573 deletions(-) create mode 100644 src/codeflare_sdk/common/kubernetes_cluster/auth_test.py create mode 100644 src/codeflare_sdk/common/kueue/kueue_test.py create mode 100644 src/codeflare_sdk/common/utils/generate_cert_test.py create mode 100644 src/codeflare_sdk/common/utils/unit_test_support.py create mode 100644 src/codeflare_sdk/common/widgets/widgets_test.py create mode 100644 src/codeflare_sdk/ray/appwrapper/awload_test.py create mode 100644 src/codeflare_sdk/ray/appwrapper/status_test.py create mode 100644 src/codeflare_sdk/ray/client/ray_jobs_test.py create mode 100644 src/codeflare_sdk/ray/cluster/cluster_test.py create mode 100644 src/codeflare_sdk/ray/cluster/config_test.py create mode 100644 src/codeflare_sdk/ray/cluster/generate_yaml_test.py create mode 100644 src/codeflare_sdk/ray/cluster/pretty_print_test.py create mode 100644 src/codeflare_sdk/ray/cluster/status_test.py delete mode 100644 tests/demo_test.py delete mode 100644 tests/func_test.py rename tests/{ => test_cluster_yamls/appwrapper}/test-case-bad.yaml (100%) rename tests/{test-case.yaml => test_cluster_yamls/kueue/aw_kueue.yaml} (93%) create mode 100644 tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml create mode 100644 tests/test_cluster_yamls/ray/default-appwrapper.yaml rename tests/{test-case-no-kueue-no-aw.yaml => test_cluster_yamls/ray/default-ray-cluster.yaml} (88%) rename tests/{test-case-custom-image.yaml => test_cluster_yamls/ray/unit-test-all-params.yaml} (83%) rename tests/{test-default-appwrapper.yaml => test_cluster_yamls/support_clusters/test-aw-a.yaml} (95%) create mode 100644 tests/test_cluster_yamls/support_clusters/test-aw-b.yaml rename tests/{test-case-no-mcad.yamls => test_cluster_yamls/support_clusters/test-rc-a.yaml} (86%) create mode 100644 tests/test_cluster_yamls/support_clusters/test-rc-b.yaml delete mode 100644 tests/unit_test.py delete mode 100644 tests/unit_test_support.py diff --git a/src/codeflare_sdk/common/kubernetes_cluster/auth_test.py b/src/codeflare_sdk/common/kubernetes_cluster/auth_test.py new file mode 100644 index 000000000..225ea6f94 --- /dev/null +++ b/src/codeflare_sdk/common/kubernetes_cluster/auth_test.py @@ -0,0 +1,166 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.common.kubernetes_cluster import ( + Authentication, + KubeConfigFileAuthentication, + TokenAuthentication, + config_check, +) +from kubernetes import client, config +import os +from pathlib import Path +import pytest + +parent = Path(__file__).resolve().parents[4] # project directory + + +def test_token_auth_creation(): + try: + token_auth = TokenAuthentication(token="token", server="server") + assert token_auth.token == "token" + assert token_auth.server == "server" + assert token_auth.skip_tls == False + assert token_auth.ca_cert_path == None + + token_auth = TokenAuthentication(token="token", server="server", skip_tls=True) + assert token_auth.token == "token" + assert token_auth.server == "server" + assert token_auth.skip_tls == True + assert token_auth.ca_cert_path == None + + os.environ["CF_SDK_CA_CERT_PATH"] = "/etc/pki/tls/custom-certs/ca-bundle.crt" + token_auth = TokenAuthentication(token="token", server="server", skip_tls=False) + assert token_auth.token == "token" + assert token_auth.server == "server" + assert token_auth.skip_tls == False + assert token_auth.ca_cert_path == "/etc/pki/tls/custom-certs/ca-bundle.crt" + os.environ.pop("CF_SDK_CA_CERT_PATH") + + token_auth = TokenAuthentication( + token="token", + server="server", + skip_tls=False, + ca_cert_path=f"{parent}/tests/auth-test.crt", + ) + assert token_auth.token == "token" + assert token_auth.server == "server" + assert token_auth.skip_tls == False + assert token_auth.ca_cert_path == f"{parent}/tests/auth-test.crt" + + except Exception: + assert 0 == 1 + + +def test_token_auth_login_logout(mocker): + mocker.patch.object(client, "ApiClient") + + token_auth = TokenAuthentication( + token="testtoken", server="testserver:6443", skip_tls=False, ca_cert_path=None + ) + assert token_auth.login() == ("Logged into testserver:6443") + assert token_auth.logout() == ("Successfully logged out of testserver:6443") + + +def test_token_auth_login_tls(mocker): + mocker.patch.object(client, "ApiClient") + + token_auth = TokenAuthentication( + token="testtoken", server="testserver:6443", skip_tls=True, ca_cert_path=None + ) + assert token_auth.login() == ("Logged into testserver:6443") + token_auth = TokenAuthentication( + token="testtoken", server="testserver:6443", skip_tls=False, ca_cert_path=None + ) + assert token_auth.login() == ("Logged into testserver:6443") + token_auth = TokenAuthentication( + token="testtoken", + server="testserver:6443", + skip_tls=False, + ca_cert_path=f"{parent}/tests/auth-test.crt", + ) + assert token_auth.login() == ("Logged into testserver:6443") + + os.environ["CF_SDK_CA_CERT_PATH"] = f"{parent}/tests/auth-test.crt" + token_auth = TokenAuthentication( + token="testtoken", + server="testserver:6443", + skip_tls=False, + ) + assert token_auth.login() == ("Logged into testserver:6443") + + +def test_config_check_no_config_file(mocker): + mocker.patch("os.path.expanduser", return_value="/mock/home/directory") + mocker.patch("os.path.isfile", return_value=False) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) + + with pytest.raises(PermissionError): + config_check() + + +def test_config_check_with_incluster_config(mocker): + mocker.patch("os.path.expanduser", return_value="/mock/home/directory") + mocker.patch("os.path.isfile", return_value=False) + mocker.patch.dict(os.environ, {"KUBERNETES_PORT": "number"}) + mocker.patch("kubernetes.config.load_incluster_config", side_effect=None) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) + + result = config_check() + assert result == None + + +def test_config_check_with_existing_config_file(mocker): + mocker.patch("os.path.expanduser", return_value="/mock/home/directory") + mocker.patch("os.path.isfile", return_value=True) + mocker.patch("kubernetes.config.load_kube_config", side_effect=None) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) + + result = config_check() + assert result == None + + +def test_config_check_with_config_path_and_no_api_client(mocker): + mocker.patch( + "codeflare_sdk.common.kubernetes_cluster.auth.config_path", "/mock/config/path" + ) + mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) + result = config_check() + assert result == "/mock/config/path" + + +def test_load_kube_config(mocker): + mocker.patch.object(config, "load_kube_config") + kube_config_auth = KubeConfigFileAuthentication( + kube_config_path="/path/to/your/config" + ) + response = kube_config_auth.load_kube_config() + + assert ( + response + == "Loaded user config file at path %s" % kube_config_auth.kube_config_path + ) + + kube_config_auth = KubeConfigFileAuthentication(kube_config_path=None) + response = kube_config_auth.load_kube_config() + assert response == "Please specify a config file path" + + +def test_auth_coverage(): + abstract = Authentication() + abstract.login() + abstract.logout() diff --git a/src/codeflare_sdk/common/kueue/kueue_test.py b/src/codeflare_sdk/common/kueue/kueue_test.py new file mode 100644 index 000000000..a4e984c30 --- /dev/null +++ b/src/codeflare_sdk/common/kueue/kueue_test.py @@ -0,0 +1,137 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ..utils.unit_test_support import get_local_queue, createClusterConfig +from unittest.mock import patch +from codeflare_sdk.ray.cluster.cluster import Cluster, ClusterConfiguration +import yaml +import os +import filecmp +from pathlib import Path + +parent = Path(__file__).resolve().parents[4] # project directory +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_none_local_queue(mocker): + mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") + config = ClusterConfiguration(name="unit-test-aw-kueue", namespace="ns") + config.name = "unit-test-aw-kueue" + config.local_queue = None + + cluster = Cluster(config) + assert cluster.config.local_queue == None + + +def test_cluster_creation_no_aw_local_queue(mocker): + # With written resources + # Create Ray Cluster with local queue specified + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + config = createClusterConfig() + config.name = "unit-test-cluster-kueue" + config.write_to_file = True + config.local_queue = "local-queue-default" + cluster = Cluster(config) + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-kueue.yaml" + assert cluster.app_wrapper_name == "unit-test-cluster-kueue" + assert filecmp.cmp( + f"{aw_dir}unit-test-cluster-kueue.yaml", + f"{parent}/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml", + shallow=True, + ) + + # With resources loaded in memory, no Local Queue specified. + config = createClusterConfig() + config.name = "unit-test-cluster-kueue" + config.write_to_file = False + cluster = Cluster(config) + + test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + with open(f"{parent}/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml") as f: + expected_rc = yaml.load(f, Loader=yaml.FullLoader) + assert test_rc == expected_rc + + +def test_aw_creation_local_queue(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + config = createClusterConfig() + config.name = "unit-test-aw-kueue" + config.appwrapper = True + config.write_to_file = True + config.local_queue = "local-queue-default" + cluster = Cluster(config) + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-aw-kueue.yaml" + assert cluster.app_wrapper_name == "unit-test-aw-kueue" + assert filecmp.cmp( + f"{aw_dir}unit-test-aw-kueue.yaml", + f"{parent}/tests/test_cluster_yamls/kueue/aw_kueue.yaml", + shallow=True, + ) + + # With resources loaded in memory, no Local Queue specified. + config = createClusterConfig() + config.name = "unit-test-aw-kueue" + config.appwrapper = True + config.write_to_file = False + cluster = Cluster(config) + + test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + with open(f"{parent}/tests/test_cluster_yamls/kueue/aw_kueue.yaml") as f: + expected_rc = yaml.load(f, Loader=yaml.FullLoader) + assert test_rc == expected_rc + + +def test_get_local_queue_exists_fail(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + config = createClusterConfig() + config.name = "unit-test-aw-kueue" + config.appwrapper = True + config.write_to_file = True + config.local_queue = "local_queue_doesn't_exist" + try: + Cluster(config) + except ValueError as e: + assert ( + str(e) + == "local_queue provided does not exist or is not in this namespace. Please provide the correct local_queue name in Cluster Configuration" + ) + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}unit-test-cluster-kueue.yaml") + os.remove(f"{aw_dir}unit-test-aw-kueue.yaml") diff --git a/src/codeflare_sdk/common/utils/generate_cert_test.py b/src/codeflare_sdk/common/utils/generate_cert_test.py new file mode 100644 index 000000000..b4439c201 --- /dev/null +++ b/src/codeflare_sdk/common/utils/generate_cert_test.py @@ -0,0 +1,114 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 + +from cryptography.hazmat.primitives.serialization import ( + Encoding, + PublicFormat, + load_pem_private_key, +) +from cryptography.x509 import load_pem_x509_certificate +import os +from codeflare_sdk.common.utils.generate_cert import ( + export_env, + generate_ca_cert, + generate_tls_cert, +) +from kubernetes import client + + +def test_generate_ca_cert(): + """ + test the function codeflare_sdk.common.utils.generate_ca_cert generates the correct outputs + """ + key, certificate = generate_ca_cert() + cert = load_pem_x509_certificate(base64.b64decode(certificate)) + private_pub_key_bytes = ( + load_pem_private_key(base64.b64decode(key), password=None) + .public_key() + .public_bytes(Encoding.PEM, PublicFormat.SubjectPublicKeyInfo) + ) + cert_pub_key_bytes = cert.public_key().public_bytes( + Encoding.PEM, PublicFormat.SubjectPublicKeyInfo + ) + assert type(key) == str + assert type(certificate) == str + # Veirfy ca.cert is self signed + assert cert.verify_directly_issued_by(cert) == None + # Verify cert has the public key bytes from the private key + assert cert_pub_key_bytes == private_pub_key_bytes + + +def secret_ca_retreival(secret_name, namespace): + ca_private_key_bytes, ca_cert = generate_ca_cert() + data = {"ca.crt": ca_cert, "ca.key": ca_private_key_bytes} + assert secret_name == "ca-secret-cluster" + assert namespace == "namespace" + return client.models.V1Secret(data=data) + + +def test_generate_tls_cert(mocker): + """ + test the function codeflare_sdk.common.utils.generate_ca_cert generates the correct outputs + """ + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.common.utils.generate_cert.get_secret_name", + return_value="ca-secret-cluster", + ) + mocker.patch( + "kubernetes.client.CoreV1Api.read_namespaced_secret", + side_effect=secret_ca_retreival, + ) + + generate_tls_cert("cluster", "namespace") + assert os.path.exists("tls-cluster-namespace") + assert os.path.exists(os.path.join("tls-cluster-namespace", "ca.crt")) + assert os.path.exists(os.path.join("tls-cluster-namespace", "tls.crt")) + assert os.path.exists(os.path.join("tls-cluster-namespace", "tls.key")) + + # verify the that the signed tls.crt is issued by the ca_cert (root cert) + with open(os.path.join("tls-cluster-namespace", "tls.crt"), "r") as f: + tls_cert = load_pem_x509_certificate(f.read().encode("utf-8")) + with open(os.path.join("tls-cluster-namespace", "ca.crt"), "r") as f: + root_cert = load_pem_x509_certificate(f.read().encode("utf-8")) + assert tls_cert.verify_directly_issued_by(root_cert) == None + + +def test_export_env(): + """ + test the function codeflare_sdk.common.utils.generate_ca_cert.export_ev generates the correct outputs + """ + tls_dir = "cluster" + ns = "namespace" + export_env(tls_dir, ns) + assert os.environ["RAY_USE_TLS"] == "1" + assert os.environ["RAY_TLS_SERVER_CERT"] == os.path.join( + os.getcwd(), f"tls-{tls_dir}-{ns}", "tls.crt" + ) + assert os.environ["RAY_TLS_SERVER_KEY"] == os.path.join( + os.getcwd(), f"tls-{tls_dir}-{ns}", "tls.key" + ) + assert os.environ["RAY_TLS_CA_CERT"] == os.path.join( + os.getcwd(), f"tls-{tls_dir}-{ns}", "ca.crt" + ) + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove("tls-cluster-namespace/ca.crt") + os.remove("tls-cluster-namespace/tls.crt") + os.remove("tls-cluster-namespace/tls.key") + os.rmdir("tls-cluster-namespace") diff --git a/src/codeflare_sdk/common/utils/unit_test_support.py b/src/codeflare_sdk/common/utils/unit_test_support.py new file mode 100644 index 000000000..61a16260c --- /dev/null +++ b/src/codeflare_sdk/common/utils/unit_test_support.py @@ -0,0 +1,383 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.cluster.cluster import ( + Cluster, + ClusterConfiguration, +) +import os +import yaml +from pathlib import Path +from kubernetes import client + +parent = Path(__file__).resolve().parents[4] # project directory +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def createClusterConfig(): + config = ClusterConfiguration( + name="unit-test-cluster", + namespace="ns", + num_workers=2, + worker_cpu_requests=3, + worker_cpu_limits=4, + worker_memory_requests=5, + worker_memory_limits=6, + appwrapper=True, + write_to_file=False, + ) + return config + + +def createClusterWithConfig(mocker): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + cluster = Cluster(createClusterConfig()) + return cluster + + +def createClusterWrongType(): + config = ClusterConfiguration( + name="unit-test-cluster", + namespace="ns", + num_workers=2, + worker_cpu_requests=[], + worker_cpu_limits=4, + worker_memory_requests=5, + worker_memory_limits=6, + worker_extended_resource_requests={"nvidia.com/gpu": 7}, + appwrapper=True, + machine_types=[True, False], + image_pull_secrets=["unit-test-pull-secret"], + image="quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06", + write_to_file=True, + labels={1: 1}, + ) + return config + + +def get_package_and_version(package_name, requirements_file_path): + with open(requirements_file_path, "r") as file: + for line in file: + if line.strip().startswith(f"{package_name}=="): + return line.strip() + return None + + +def get_local_queue(group, version, namespace, plural): + assert group == "kueue.x-k8s.io" + assert version == "v1beta1" + assert namespace == "ns" + assert plural == "localqueues" + local_queues = { + "apiVersion": "kueue.x-k8s.io/v1beta1", + "items": [ + { + "apiVersion": "kueue.x-k8s.io/v1beta1", + "kind": "LocalQueue", + "metadata": { + "annotations": {"kueue.x-k8s.io/default-queue": "true"}, + "name": "local-queue-default", + "namespace": "ns", + }, + "spec": {"clusterQueue": "cluster-queue"}, + }, + { + "apiVersion": "kueue.x-k8s.io/v1beta1", + "kind": "LocalQueue", + "metadata": { + "name": "team-a-queue", + "namespace": "ns", + }, + "spec": {"clusterQueue": "team-a-queue"}, + }, + ], + "kind": "LocalQueueList", + "metadata": {"continue": "", "resourceVersion": "2266811"}, + } + return local_queues + + +def arg_check_aw_apply_effect(group, version, namespace, plural, body, *args): + assert group == "workload.codeflare.dev" + assert version == "v1beta2" + assert namespace == "ns" + assert plural == "appwrappers" + with open(f"{aw_dir}test.yaml") as f: + aw = yaml.load(f, Loader=yaml.FullLoader) + assert body == aw + assert args == tuple() + + +def arg_check_aw_del_effect(group, version, namespace, plural, name, *args): + assert group == "workload.codeflare.dev" + assert version == "v1beta2" + assert namespace == "ns" + assert plural == "appwrappers" + assert name == "test" + assert args == tuple() + + +def get_cluster_object(file_a, file_b): + with open(file_a) as f: + cluster_a = yaml.load(f, Loader=yaml.FullLoader) + with open(file_b) as f: + cluster_b = yaml.load(f, Loader=yaml.FullLoader) + + return cluster_a, cluster_b + + +def get_ray_obj(group, version, namespace, plural): + # To be used for mocking list_namespaced_custom_object for Ray Clusters + rc_a_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml" + rc_b_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml" + rc_a, rc_b = get_cluster_object(rc_a_path, rc_b_path) + + rc_list = {"items": [rc_a, rc_b]} + return rc_list + + +def get_ray_obj_with_status(group, version, namespace, plural): + # To be used for mocking list_namespaced_custom_object for Ray Clusters with statuses + rc_a_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml" + rc_b_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml" + rc_a, rc_b = get_cluster_object(rc_a_path, rc_b_path) + + rc_a.update( + { + "status": { + "desiredWorkerReplicas": 1, + "endpoints": { + "client": "10001", + "dashboard": "8265", + "gcs": "6379", + "metrics": "8080", + }, + "head": {"serviceIP": "172.30.179.88"}, + "lastUpdateTime": "2024-03-05T09:55:37Z", + "maxWorkerReplicas": 1, + "minWorkerReplicas": 1, + "observedGeneration": 1, + "state": "ready", + }, + } + ) + rc_b.update( + { + "status": { + "availableWorkerReplicas": 2, + "desiredWorkerReplicas": 1, + "endpoints": { + "client": "10001", + "dashboard": "8265", + "gcs": "6379", + }, + "lastUpdateTime": "2023-02-22T16:26:16Z", + "maxWorkerReplicas": 1, + "minWorkerReplicas": 1, + "state": "suspended", + } + } + ) + + rc_list = {"items": [rc_a, rc_b]} + return rc_list + + +def get_aw_obj(group, version, namespace, plural): + # To be used for mocking list_namespaced_custom_object for AppWrappers + aw_a_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml" + aw_b_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml" + aw_a, aw_b = get_cluster_object(aw_a_path, aw_b_path) + + aw_list = {"items": [aw_a, aw_b]} + return aw_list + + +def get_aw_obj_with_status(group, version, namespace, plural): + # To be used for mocking list_namespaced_custom_object for AppWrappers with statuses + aw_a_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml" + aw_b_path = f"{parent}/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml" + aw_a, aw_b = get_cluster_object(aw_a_path, aw_b_path) + + aw_a.update( + { + "status": { + "phase": "Running", + }, + } + ) + aw_b.update( + { + "status": { + "phase": "Suspended", + }, + } + ) + + aw_list = {"items": [aw_a, aw_b]} + return aw_list + + +def get_named_aw(group, version, namespace, plural, name): + aws = get_aw_obj("workload.codeflare.dev", "v1beta2", "ns", "appwrappers") + return aws["items"][0] + + +def arg_check_del_effect(group, version, namespace, plural, name, *args): + assert namespace == "ns" + assert args == tuple() + if plural == "appwrappers": + assert group == "workload.codeflare.dev" + assert version == "v1beta2" + assert name == "unit-test-cluster" + elif plural == "rayclusters": + assert group == "ray.io" + assert version == "v1" + assert name == "unit-test-cluster-ray" + elif plural == "ingresses": + assert group == "networking.k8s.io" + assert version == "v1" + assert name == "ray-dashboard-unit-test-cluster-ray" + + +def arg_check_apply_effect(group, version, namespace, plural, body, *args): + assert namespace == "ns" + assert args == tuple() + if plural == "appwrappers": + assert group == "workload.codeflare.dev" + assert version == "v1beta2" + elif plural == "rayclusters": + assert group == "ray.io" + assert version == "v1" + elif plural == "ingresses": + assert group == "networking.k8s.io" + assert version == "v1" + elif plural == "routes": + assert group == "route.openshift.io" + assert version == "v1" + else: + assert 1 == 0 + + +def get_obj_none(group, version, namespace, plural): + return {"items": []} + + +def route_list_retrieval(group, version, namespace, plural): + assert group == "route.openshift.io" + assert version == "v1" + assert namespace == "ns" + assert plural == "routes" + return { + "kind": "RouteList", + "apiVersion": "route.openshift.io/v1", + "metadata": {"resourceVersion": "6072398"}, + "items": [ + { + "metadata": { + "name": "ray-dashboard-quicktest", + "namespace": "ns", + }, + "spec": { + "host": "ray-dashboard-quicktest-opendatahub.apps.cluster.awsroute.org", + "to": { + "kind": "Service", + "name": "quicktest-head-svc", + "weight": 100, + }, + "port": {"targetPort": "dashboard"}, + "tls": {"termination": "edge"}, + }, + }, + { + "metadata": { + "name": "rayclient-quicktest", + "namespace": "ns", + }, + "spec": { + "host": "rayclient-quicktest-opendatahub.apps.cluster.awsroute.org", + "to": { + "kind": "Service", + "name": "quicktest-head-svc", + "weight": 100, + }, + "port": {"targetPort": "client"}, + "tls": {"termination": "passthrough"}, + }, + }, + ], + } + + +def ingress_retrieval( + cluster_name="unit-test-cluster", client_ing: bool = False, annotations: dict = None +): + dashboard_ingress = mocked_ingress(8265, cluster_name, annotations) + if client_ing: + client_ingress = mocked_ingress( + 10001, cluster_name=cluster_name, annotations=annotations + ) + mock_ingress_list = client.V1IngressList( + items=[client_ingress, dashboard_ingress] + ) + else: + mock_ingress_list = client.V1IngressList(items=[dashboard_ingress]) + + return mock_ingress_list + + +def mocked_ingress(port, cluster_name="unit-test-cluster", annotations: dict = None): + labels = {"ingress-owner": cluster_name} + if port == 10001: + name = f"rayclient-{cluster_name}" + else: + name = f"ray-dashboard-{cluster_name}" + mock_ingress = client.V1Ingress( + metadata=client.V1ObjectMeta( + name=name, + annotations=annotations, + labels=labels, + owner_references=[ + client.V1OwnerReference( + api_version="v1", kind="Ingress", name=cluster_name, uid="unique-id" + ) + ], + ), + spec=client.V1IngressSpec( + rules=[ + client.V1IngressRule( + host=f"{name}-ns.apps.cluster.awsroute.org", + http=client.V1HTTPIngressRuleValue( + paths=[ + client.V1HTTPIngressPath( + path_type="Prefix", + path="/", + backend=client.V1IngressBackend( + service=client.V1IngressServiceBackend( + name="head-svc-test", + port=client.V1ServiceBackendPort(number=port), + ) + ), + ) + ] + ), + ) + ], + ), + ) + return mock_ingress diff --git a/src/codeflare_sdk/common/widgets/widgets_test.py b/src/codeflare_sdk/common/widgets/widgets_test.py new file mode 100644 index 000000000..e01b91933 --- /dev/null +++ b/src/codeflare_sdk/common/widgets/widgets_test.py @@ -0,0 +1,469 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import codeflare_sdk.common.widgets.widgets as cf_widgets +import pandas as pd +from unittest.mock import MagicMock, patch +from ..utils.unit_test_support import get_local_queue, createClusterConfig +from codeflare_sdk.ray.cluster.cluster import Cluster +from codeflare_sdk.ray.cluster.status import ( + RayCluster, + RayClusterStatus, +) +import pytest +from kubernetes import client + + +@patch.dict( + "os.environ", {"JPY_SESSION_NAME": "example-test"} +) # Mock Jupyter environment variable +def test_cluster_up_down_buttons(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + cluster = Cluster(createClusterConfig()) + + with patch("ipywidgets.Button") as MockButton, patch( + "ipywidgets.Checkbox" + ) as MockCheckbox, patch("ipywidgets.Output"), patch("ipywidgets.HBox"), patch( + "ipywidgets.VBox" + ), patch.object( + cluster, "up" + ) as mock_up, patch.object( + cluster, "down" + ) as mock_down, patch.object( + cluster, "wait_ready" + ) as mock_wait_ready: + # Create mock button & CheckBox instances + mock_up_button = MagicMock() + mock_down_button = MagicMock() + mock_wait_ready_check_box = MagicMock() + + # Ensure the mock Button class returns the mock button instances in sequence + MockCheckbox.side_effect = [mock_wait_ready_check_box] + MockButton.side_effect = [mock_up_button, mock_down_button] + + # Call the method under test + cf_widgets.cluster_up_down_buttons(cluster) + + # Simulate checkbox being checked or unchecked + mock_wait_ready_check_box.value = True # Simulate checkbox being checked + + # Simulate the button clicks by calling the mock on_click handlers + mock_up_button.on_click.call_args[0][0](None) # Simulate clicking "Cluster Up" + mock_down_button.on_click.call_args[0][0]( + None + ) # Simulate clicking "Cluster Down" + + # Check if the `up` and `down` methods were called + mock_wait_ready.assert_called_once() + mock_up.assert_called_once() + mock_down.assert_called_once() + + +@patch.dict("os.environ", {}, clear=True) # Mock environment with no variables +def test_is_notebook_false(): + assert cf_widgets.is_notebook() is False + + +@patch.dict( + "os.environ", {"JPY_SESSION_NAME": "example-test"} +) # Mock Jupyter environment variable +def test_is_notebook_true(): + assert cf_widgets.is_notebook() is True + + +def test_view_clusters(mocker, capsys): + # If is not a notebook environment, a warning should be raised + with pytest.warns( + UserWarning, + match="view_clusters can only be used in a Jupyter Notebook environment.", + ): + result = cf_widgets.view_clusters("default") + + # Assert the function returns None when not in a notebook environment + assert result is None + + # Prepare to run view_clusters when notebook environment is detected + mocker.patch("codeflare_sdk.common.widgets.widgets.is_notebook", return_value=True) + mock_get_current_namespace = mocker.patch( + "codeflare_sdk.ray.cluster.cluster.get_current_namespace", + return_value="default", + ) + namespace = mock_get_current_namespace.return_value + + # Assert the function returns None when no clusters are found + mock_fetch_cluster_data = mocker.patch( + "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", + return_value=pd.DataFrame(), + ) + result = cf_widgets.view_clusters() + captured = capsys.readouterr() + assert mock_fetch_cluster_data.return_value.empty + assert "No clusters found in the default namespace." in captured.out + assert result is None + + # Prepare to run view_clusters with a test DataFrame + mock_fetch_cluster_data = mocker.patch( + "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", + return_value=pd.DataFrame( + { + "Name": ["test-cluster"], + "Namespace": ["default"], + "Num Workers": ["1"], + "Head GPUs": ["0"], + "Worker GPUs": ["0"], + "Head CPU Req~Lim": ["1~1"], + "Head Memory Req~Lim": ["1Gi~1Gi"], + "Worker CPU Req~Lim": ["1~1"], + "Worker Memory Req~Lim": ["1Gi~1Gi"], + "status": ['Ready ✓'], + } + ), + ) + # Create a RayClusterManagerWidgets instance + ray_cluster_manager_instance = cf_widgets.RayClusterManagerWidgets( + ray_clusters_df=mock_fetch_cluster_data.return_value, namespace=namespace + ) + # Patch the constructor of RayClusterManagerWidgets to return our initialized instance + mock_constructor = mocker.patch( + "codeflare_sdk.common.widgets.widgets.RayClusterManagerWidgets", + return_value=ray_cluster_manager_instance, + ) + + # Use a spy to track calls to display_widgets without replacing it + spy_display_widgets = mocker.spy(ray_cluster_manager_instance, "display_widgets") + + cf_widgets.view_clusters() + + mock_constructor.assert_called_once_with( + ray_clusters_df=mock_fetch_cluster_data.return_value, namespace=namespace + ) + + spy_display_widgets.assert_called_once() + + +def test_delete_cluster(mocker, capsys): + name = "test-cluster" + namespace = "default" + + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + + mock_ray_cluster = MagicMock() + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=[ + mock_ray_cluster, + client.ApiException(status=404), + client.ApiException(status=404), + mock_ray_cluster, + ], + ) + + # In this scenario, the RayCluster exists and the AppWrapper does not. + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._check_aw_exists", return_value=False + ) + mock_delete_rc = mocker.patch( + "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object" + ) + cf_widgets._delete_cluster(name, namespace) + + mock_delete_rc.assert_called_once_with( + group="ray.io", + version="v1", + namespace=namespace, + plural="rayclusters", + name=name, + ) + + # In this scenario, the AppWrapper exists and the RayCluster does not + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._check_aw_exists", return_value=True + ) + mock_delete_aw = mocker.patch( + "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object" + ) + cf_widgets._delete_cluster(name, namespace) + + mock_delete_aw.assert_called_once_with( + group="workload.codeflare.dev", + version="v1beta2", + namespace=namespace, + plural="appwrappers", + name=name, + ) + + # In this scenario, the deletion of the resource times out. + with pytest.raises( + TimeoutError, match=f"Timeout waiting for {name} to be deleted." + ): + cf_widgets._delete_cluster(name, namespace, 1) + + +def test_ray_cluster_manager_widgets_init(mocker, capsys): + namespace = "default" + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + test_ray_clusters_df = pd.DataFrame( + { + "Name": ["test-cluster-1", "test-cluster-2"], + "Namespace": [namespace, namespace], + "Num Workers": ["1", "2"], + "Head GPUs": ["0", "0"], + "Worker GPUs": ["0", "0"], + "Head CPU Req~Lim": ["1~1", "1~1"], + "Head Memory Req~Lim": ["1Gi~1Gi", "1Gi~1Gi"], + "Worker CPU Req~Lim": ["1~1", "1~1"], + "Worker Memory Req~Lim": ["1Gi~1Gi", "1Gi~1Gi"], + "status": [ + 'Ready ✓', + 'Ready ✓', + ], + } + ) + mock_fetch_cluster_data = mocker.patch( + "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", + return_value=test_ray_clusters_df, + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.get_current_namespace", + return_value=namespace, + ) + mock_delete_cluster = mocker.patch( + "codeflare_sdk.common.widgets.widgets._delete_cluster" + ) + + # # Mock ToggleButtons + mock_toggle_buttons = mocker.patch("ipywidgets.ToggleButtons") + mock_button = mocker.patch("ipywidgets.Button") + mock_output = mocker.patch("ipywidgets.Output") + + # Initialize the RayClusterManagerWidgets instance + ray_cluster_manager_instance = cf_widgets.RayClusterManagerWidgets( + ray_clusters_df=test_ray_clusters_df, namespace=namespace + ) + + # Assertions for DataFrame and attributes + assert ray_cluster_manager_instance.ray_clusters_df.equals( + test_ray_clusters_df + ), "ray_clusters_df attribute does not match the input DataFrame" + assert ( + ray_cluster_manager_instance.namespace == namespace + ), f"Expected namespace to be '{namespace}', but got '{ray_cluster_manager_instance.namespace}'" + assert ( + ray_cluster_manager_instance.classification_widget.options + == test_ray_clusters_df["Name"].tolist() + ), "classification_widget options do not match the input DataFrame" + + # Assertions for widgets + mock_toggle_buttons.assert_called_once_with( + options=test_ray_clusters_df["Name"].tolist(), + value=test_ray_clusters_df["Name"].tolist()[0], + description="Select an existing cluster:", + ) + assert ( + ray_cluster_manager_instance.classification_widget + == mock_toggle_buttons.return_value + ), "classification_widget is not set correctly" + assert ( + ray_cluster_manager_instance.delete_button == mock_button.return_value + ), "delete_button is not set correctly" + assert ( + ray_cluster_manager_instance.list_jobs_button == mock_button.return_value + ), "list_jobs_button is not set correctly" + assert ( + ray_cluster_manager_instance.ray_dashboard_button == mock_button.return_value + ), "ray_dashboard_button is not set correctly" + assert ( + ray_cluster_manager_instance.raycluster_data_output == mock_output.return_value + ), "raycluster_data_output is not set correctly" + assert ( + ray_cluster_manager_instance.user_output == mock_output.return_value + ), "user_output is not set correctly" + assert ( + ray_cluster_manager_instance.url_output == mock_output.return_value + ), "url_output is not set correctly" + + ### Test button click events + mock_delete_button = MagicMock() + mock_list_jobs_button = MagicMock() + mock_ray_dashboard_button = MagicMock() + + mock_javascript = mocker.patch("codeflare_sdk.common.widgets.widgets.Javascript") + ray_cluster_manager_instance.url_output = MagicMock() + + mock_dashboard_uri = mocker.patch( + "codeflare_sdk.ray.cluster.cluster.Cluster.cluster_dashboard_uri", + return_value="https://ray-dashboard-test-cluster-1-ns.apps.cluster.awsroute.org", + ) + + # Simulate clicking the list jobs button + ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" + ray_cluster_manager_instance._on_list_jobs_button_click(mock_list_jobs_button) + + captured = capsys.readouterr() + assert ( + f"Opening Ray Jobs Dashboard for test-cluster-1 cluster:\n{mock_dashboard_uri.return_value}/#/jobs" + in captured.out + ) + mock_javascript.assert_called_with( + f'window.open("{mock_dashboard_uri.return_value}/#/jobs", "_blank");' + ) + + # Simulate clicking the Ray dashboard button + ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" + ray_cluster_manager_instance._on_ray_dashboard_button_click( + mock_ray_dashboard_button + ) + + captured = capsys.readouterr() + assert ( + f"Opening Ray Dashboard for test-cluster-1 cluster:\n{mock_dashboard_uri.return_value}" + in captured.out + ) + mock_javascript.assert_called_with( + f'window.open("{mock_dashboard_uri.return_value}", "_blank");' + ) + + # Simulate clicking the delete button + ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" + ray_cluster_manager_instance._on_delete_button_click(mock_delete_button) + mock_delete_cluster.assert_called_with("test-cluster-1", namespace) + + mock_fetch_cluster_data.return_value = pd.DataFrame() + ray_cluster_manager_instance.classification_widget.value = "test-cluster-2" + ray_cluster_manager_instance._on_delete_button_click(mock_delete_button) + mock_delete_cluster.assert_called_with("test-cluster-2", namespace) + + # Assert on deletion that the dataframe is empty + assert ( + ray_cluster_manager_instance.ray_clusters_df.empty + ), "Expected DataFrame to be empty after deletion" + + captured = capsys.readouterr() + assert ( + f"Cluster test-cluster-1 in the {namespace} namespace was deleted successfully." + in captured.out + ) + + +def test_fetch_cluster_data(mocker): + # Return empty dataframe when no clusters are found + mocker.patch("codeflare_sdk.ray.cluster.cluster.list_all_clusters", return_value=[]) + df = cf_widgets._fetch_cluster_data(namespace="default") + assert df.empty + + # Create mock RayCluster objects + mock_raycluster1 = MagicMock(spec=RayCluster) + mock_raycluster1.name = "test-cluster-1" + mock_raycluster1.namespace = "default" + mock_raycluster1.num_workers = 1 + mock_raycluster1.head_extended_resources = {"nvidia.com/gpu": "1"} + mock_raycluster1.worker_extended_resources = {"nvidia.com/gpu": "2"} + mock_raycluster1.head_cpu_requests = "500m" + mock_raycluster1.head_cpu_limits = "1000m" + mock_raycluster1.head_mem_requests = "1Gi" + mock_raycluster1.head_mem_limits = "2Gi" + mock_raycluster1.worker_cpu_requests = "1000m" + mock_raycluster1.worker_cpu_limits = "2000m" + mock_raycluster1.worker_mem_requests = "2Gi" + mock_raycluster1.worker_mem_limits = "4Gi" + mock_raycluster1.status = MagicMock() + mock_raycluster1.status.name = "READY" + mock_raycluster1.status = RayClusterStatus.READY + + mock_raycluster2 = MagicMock(spec=RayCluster) + mock_raycluster2.name = "test-cluster-2" + mock_raycluster2.namespace = "default" + mock_raycluster2.num_workers = 2 + mock_raycluster2.head_extended_resources = {} + mock_raycluster2.worker_extended_resources = {} + mock_raycluster2.head_cpu_requests = None + mock_raycluster2.head_cpu_limits = None + mock_raycluster2.head_mem_requests = None + mock_raycluster2.head_mem_limits = None + mock_raycluster2.worker_cpu_requests = None + mock_raycluster2.worker_cpu_limits = None + mock_raycluster2.worker_mem_requests = None + mock_raycluster2.worker_mem_limits = None + mock_raycluster2.status = MagicMock() + mock_raycluster2.status.name = "SUSPENDED" + mock_raycluster2.status = RayClusterStatus.SUSPENDED + + with patch( + "codeflare_sdk.ray.cluster.cluster.list_all_clusters", + return_value=[mock_raycluster1, mock_raycluster2], + ): + # Call the function under test + df = cf_widgets._fetch_cluster_data(namespace="default") + + # Expected DataFrame + expected_data = { + "Name": ["test-cluster-1", "test-cluster-2"], + "Namespace": ["default", "default"], + "Num Workers": [1, 2], + "Head GPUs": ["nvidia.com/gpu: 1", "0"], + "Worker GPUs": ["nvidia.com/gpu: 2", "0"], + "Head CPU Req~Lim": ["500m~1000m", "0~0"], + "Head Memory Req~Lim": ["1Gi~2Gi", "0~0"], + "Worker CPU Req~Lim": ["1000m~2000m", "0~0"], + "Worker Memory Req~Lim": ["2Gi~4Gi", "0~0"], + "status": [ + 'Ready ✓', + 'Suspended ❄️', + ], + } + + expected_df = pd.DataFrame(expected_data) + + # Assert that the DataFrame matches expected + pd.testing.assert_frame_equal( + df.reset_index(drop=True), expected_df.reset_index(drop=True) + ) + + +def test_format_status(): + # Test each possible status + test_cases = [ + (RayClusterStatus.READY, 'Ready ✓'), + ( + RayClusterStatus.SUSPENDED, + 'Suspended ❄️', + ), + (RayClusterStatus.FAILED, 'Failed ✗'), + (RayClusterStatus.UNHEALTHY, 'Unhealthy'), + (RayClusterStatus.UNKNOWN, 'Unknown'), + ] + + for status, expected_output in test_cases: + assert ( + cf_widgets._format_status(status) == expected_output + ), f"Failed for status: {status}" + + # Test an unrecognized status + unrecognized_status = "NotAStatus" + assert ( + cf_widgets._format_status(unrecognized_status) == "NotAStatus" + ), "Failed for unrecognized status" diff --git a/src/codeflare_sdk/ray/appwrapper/awload_test.py b/src/codeflare_sdk/ray/appwrapper/awload_test.py new file mode 100644 index 000000000..6909394b2 --- /dev/null +++ b/src/codeflare_sdk/ray/appwrapper/awload_test.py @@ -0,0 +1,88 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from codeflare_sdk.common.utils.unit_test_support import ( + arg_check_aw_apply_effect, + arg_check_aw_del_effect, +) +from codeflare_sdk.ray.appwrapper import AWManager +from codeflare_sdk.ray.cluster import Cluster, ClusterConfiguration +import os +from pathlib import Path + +parent = Path(__file__).resolve().parents[4] # project directory +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_AWManager_creation(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") + # Create test.yaml + Cluster( + ClusterConfiguration( + name="test", + namespace="ns", + write_to_file=True, + appwrapper=True, + ) + ) + + testaw = AWManager(f"{aw_dir}test.yaml") + assert testaw.name == "test" + assert testaw.namespace == "ns" + assert testaw.submitted == False + try: + testaw = AWManager("fake") + except Exception as e: + assert type(e) == FileNotFoundError + assert str(e) == "[Errno 2] No such file or directory: 'fake'" + try: + testaw = AWManager( + f"{parent}/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml" + ) + except Exception as e: + assert type(e) == ValueError + assert ( + str(e) + == f"{parent}/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml is not a correctly formatted AppWrapper yaml" + ) + + +def test_AWManager_submit_remove(mocker, capsys): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + testaw = AWManager(f"{aw_dir}test.yaml") + testaw.remove() + captured = capsys.readouterr() + assert ( + captured.out + == "AppWrapper not submitted by this manager yet, nothing to remove\n" + ) + assert testaw.submitted == False + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", + side_effect=arg_check_aw_apply_effect, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", + side_effect=arg_check_aw_del_effect, + ) + testaw.submit() + assert testaw.submitted == True + testaw.remove() + assert testaw.submitted == False + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}test.yaml") diff --git a/src/codeflare_sdk/ray/appwrapper/status_test.py b/src/codeflare_sdk/ray/appwrapper/status_test.py new file mode 100644 index 000000000..8c693767c --- /dev/null +++ b/src/codeflare_sdk/ray/appwrapper/status_test.py @@ -0,0 +1,104 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.cluster.cluster import ( + _app_wrapper_status, + Cluster, + ClusterConfiguration, +) +from codeflare_sdk.ray.appwrapper import AppWrapper, AppWrapperStatus +from codeflare_sdk.ray.cluster.status import CodeFlareClusterStatus +import os + +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_cluster_status(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + fake_aw = AppWrapper("test", AppWrapperStatus.FAILED) + + cf = Cluster( + ClusterConfiguration( + name="test", + namespace="ns", + write_to_file=True, + appwrapper=True, + local_queue="local_default_queue", + ) + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=None + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None + ) + status, ready = cf.status() + assert status == CodeFlareClusterStatus.UNKNOWN + assert ready == False + + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=fake_aw + ) + status, ready = cf.status() + assert status == CodeFlareClusterStatus.FAILED + assert ready == False + + fake_aw.status = AppWrapperStatus.SUSPENDED + status, ready = cf.status() + assert status == CodeFlareClusterStatus.QUEUED + assert ready == False + + fake_aw.status = AppWrapperStatus.RESUMING + status, ready = cf.status() + assert status == CodeFlareClusterStatus.STARTING + assert ready == False + + fake_aw.status = AppWrapperStatus.RESETTING + status, ready = cf.status() + assert status == CodeFlareClusterStatus.STARTING + assert ready == False + + fake_aw.status = AppWrapperStatus.RUNNING + status, ready = cf.status() + assert status == CodeFlareClusterStatus.UNKNOWN + assert ready == False + + +def aw_status_fields(group, version, namespace, plural, *args): + assert group == "workload.codeflare.dev" + assert version == "v1beta2" + assert namespace == "test-ns" + assert plural == "appwrappers" + assert args == tuple() + return {"items": []} + + +def test_aw_status(mocker): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=aw_status_fields, + ) + aw = _app_wrapper_status("test-aw", "test-ns") + assert aw == None + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}test.yaml") diff --git a/src/codeflare_sdk/ray/client/ray_jobs_test.py b/src/codeflare_sdk/ray/client/ray_jobs_test.py new file mode 100644 index 000000000..cbb27aa7a --- /dev/null +++ b/src/codeflare_sdk/ray/client/ray_jobs_test.py @@ -0,0 +1,173 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ray.job_submission import JobSubmissionClient +from codeflare_sdk.ray.client.ray_jobs import RayJobClient +from codeflare_sdk.common.utils.unit_test_support import get_package_and_version +import pytest + + +# rjc == RayJobClient +@pytest.fixture +def ray_job_client(mocker): + # Creating a fixture to instantiate RayJobClient with a mocked JobSubmissionClient + mocker.patch.object(JobSubmissionClient, "__init__", return_value=None) + return RayJobClient( + "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + ) + + +def test_rjc_submit_job(ray_job_client, mocker): + mocked_submit_job = mocker.patch.object( + JobSubmissionClient, "submit_job", return_value="mocked_submission_id" + ) + submission_id = ray_job_client.submit_job(entrypoint={"pip": ["numpy"]}) + + mocked_submit_job.assert_called_once_with( + entrypoint={"pip": ["numpy"]}, + job_id=None, + runtime_env=None, + metadata=None, + submission_id=None, + entrypoint_num_cpus=None, + entrypoint_num_gpus=None, + entrypoint_memory=None, + entrypoint_resources=None, + ) + + assert submission_id == "mocked_submission_id" + + +def test_rjc_delete_job(ray_job_client, mocker): + # Case return True + mocked_delete_job_True = mocker.patch.object( + JobSubmissionClient, "delete_job", return_value=True + ) + result = ray_job_client.delete_job(job_id="mocked_job_id") + + mocked_delete_job_True.assert_called_once_with(job_id="mocked_job_id") + assert result == (True, "Successfully deleted Job mocked_job_id") + + # Case return False + mocked_delete_job_False = mocker.patch.object( + JobSubmissionClient, "delete_job", return_value=(False) + ) + result = ray_job_client.delete_job(job_id="mocked_job_id") + + mocked_delete_job_False.assert_called_once_with(job_id="mocked_job_id") + assert result == (False, "Failed to delete Job mocked_job_id") + + +def test_rjc_stop_job(ray_job_client, mocker): + # Case return True + mocked_stop_job_True = mocker.patch.object( + JobSubmissionClient, "stop_job", return_value=(True) + ) + result = ray_job_client.stop_job(job_id="mocked_job_id") + + mocked_stop_job_True.assert_called_once_with(job_id="mocked_job_id") + assert result == (True, "Successfully stopped Job mocked_job_id") + + # Case return False + mocked_stop_job_False = mocker.patch.object( + JobSubmissionClient, "stop_job", return_value=(False) + ) + result = ray_job_client.stop_job(job_id="mocked_job_id") + + mocked_stop_job_False.assert_called_once_with(job_id="mocked_job_id") + assert result == ( + False, + "Failed to stop Job, mocked_job_id could have already completed.", + ) + + +def test_rjc_address(ray_job_client, mocker): + mocked_rjc_address = mocker.patch.object( + JobSubmissionClient, + "get_address", + return_value="https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org", + ) + address = ray_job_client.get_address() + + mocked_rjc_address.assert_called_once() + assert ( + address + == "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + ) + + +def test_rjc_get_job_logs(ray_job_client, mocker): + mocked_rjc_get_job_logs = mocker.patch.object( + JobSubmissionClient, "get_job_logs", return_value="Logs" + ) + logs = ray_job_client.get_job_logs(job_id="mocked_job_id") + + mocked_rjc_get_job_logs.assert_called_once_with(job_id="mocked_job_id") + assert logs == "Logs" + + +def test_rjc_get_job_info(ray_job_client, mocker): + job_details_example = "JobDetails(type=, job_id=None, submission_id='mocked_submission_id', driver_info=None, status=, entrypoint='python test.py', message='Job has not started yet. It may be waiting for the runtime environment to be set up.', error_type=None, start_time=1701271760641, end_time=None, metadata={}, runtime_env={'working_dir': 'gcs://_ray_pkg_67de6f0e60d43b19.zip', 'pip': {'packages': ['numpy'], 'pip_check': False}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}, driver_agent_http_address=None, driver_node_id=None)" + mocked_rjc_get_job_info = mocker.patch.object( + JobSubmissionClient, "get_job_info", return_value=job_details_example + ) + job_details = ray_job_client.get_job_info(job_id="mocked_job_id") + + mocked_rjc_get_job_info.assert_called_once_with(job_id="mocked_job_id") + assert job_details == job_details_example + + +def test_rjc_get_job_status(ray_job_client, mocker): + job_status_example = "" + mocked_rjc_get_job_status = mocker.patch.object( + JobSubmissionClient, "get_job_status", return_value=job_status_example + ) + job_status = ray_job_client.get_job_status(job_id="mocked_job_id") + + mocked_rjc_get_job_status.assert_called_once_with(job_id="mocked_job_id") + assert job_status == job_status_example + + +def test_rjc_tail_job_logs(ray_job_client, mocker): + logs_example = [ + "Job started...", + "Processing input data...", + "Finalizing results...", + "Job completed successfully.", + ] + mocked_rjc_tail_job_logs = mocker.patch.object( + JobSubmissionClient, "tail_job_logs", return_value=logs_example + ) + job_tail_job_logs = ray_job_client.tail_job_logs(job_id="mocked_job_id") + + mocked_rjc_tail_job_logs.assert_called_once_with(job_id="mocked_job_id") + assert job_tail_job_logs == logs_example + + +def test_rjc_list_jobs(ray_job_client, mocker): + requirements_path = "tests/e2e/mnist_pip_requirements.txt" + pytorch_lightning = get_package_and_version("pytorch_lightning", requirements_path) + torchmetrics = get_package_and_version("torchmetrics", requirements_path) + torchvision = get_package_and_version("torchvision", requirements_path) + jobs_list = [ + f"JobDetails(type=, job_id=None, submission_id='raysubmit_4k2NYS1YbRXYPZCM', driver_info=None, status=, entrypoint='python mnist.py', message='Job finished successfully.', error_type=None, start_time=1701352132585, end_time=1701352192002, metadata={{}}, runtime_env={{'working_dir': 'gcs://_ray_pkg_6200b93a110e8033.zip', 'pip': {{'packages': ['{pytorch_lightning}', 'ray_lightning', '{torchmetrics}', '{torchvision}'], 'pip_check': False}}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}}, driver_agent_http_address='http://10.131.0.18:52365', driver_node_id='9fb515995f5fb13ad4db239ceea378333bebf0a2d45b6aa09d02e691')", + f"JobDetails(type=, job_id=None, submission_id='raysubmit_iRuwU8vdkbUZZGvT', driver_info=None, status=, entrypoint='python mnist.py', message='Job was intentionally stopped.', error_type=None, start_time=1701353096163, end_time=1701353097733, metadata={{}}, runtime_env={{'working_dir': 'gcs://_ray_pkg_6200b93a110e8033.zip', 'pip': {{'packages': ['{pytorch_lightning}', 'ray_lightning', '{torchmetrics}', '{torchvision}'], 'pip_check': False}}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}}, driver_agent_http_address='http://10.131.0.18:52365', driver_node_id='9fb515995f5fb13ad4db239ceea378333bebf0a2d45b6aa09d02e691')", + ] + mocked_rjc_list_jobs = mocker.patch.object( + JobSubmissionClient, "list_jobs", return_value=jobs_list + ) + job_list_jobs = ray_job_client.list_jobs() + + mocked_rjc_list_jobs.assert_called_once() + assert job_list_jobs == jobs_list diff --git a/src/codeflare_sdk/ray/cluster/cluster_test.py b/src/codeflare_sdk/ray/cluster/cluster_test.py new file mode 100644 index 000000000..20438bbe3 --- /dev/null +++ b/src/codeflare_sdk/ray/cluster/cluster_test.py @@ -0,0 +1,610 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.cluster.cluster import ( + Cluster, + ClusterConfiguration, + get_cluster, + list_all_queued, +) +from codeflare_sdk.common.utils.unit_test_support import ( + createClusterWithConfig, + arg_check_del_effect, + ingress_retrieval, + arg_check_apply_effect, + get_local_queue, + createClusterConfig, + route_list_retrieval, + get_ray_obj, + get_aw_obj, + get_named_aw, + get_obj_none, + get_ray_obj_with_status, + get_aw_obj_with_status, +) +from codeflare_sdk.ray.cluster.generate_yaml import ( + is_openshift_cluster, + is_kind_cluster, +) +from pathlib import Path +from unittest.mock import MagicMock +from kubernetes import client +import os + +parent = Path(__file__).resolve().parents[4] # project directory +expected_clusters_dir = f"{parent}/tests/test_cluster_yamls" +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_cluster_up_down(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster") + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": ""}}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", + side_effect=arg_check_apply_effect, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", + side_effect=arg_check_del_effect, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + return_value={"items": []}, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + cluster = cluster = createClusterWithConfig(mocker) + cluster.up() + cluster.down() + + +def test_cluster_up_down_no_mcad(mocker): + mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", + side_effect=arg_check_apply_effect, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", + side_effect=arg_check_del_effect, + ) + mocker.patch( + "kubernetes.client.CoreV1Api.create_namespaced_secret", + ) + mocker.patch( + "kubernetes.client.CoreV1Api.delete_namespaced_secret", + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + return_value={"items": []}, + ) + config = createClusterConfig() + config.name = "unit-test-cluster-ray" + config.appwrapper = False + cluster = Cluster(config) + cluster.up() + cluster.down() + + +def test_cluster_uris(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_ingress_domain", + return_value="apps.cluster.awsroute.org", + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + cluster = cluster = createClusterWithConfig(mocker) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval( + cluster_name="unit-test-cluster", + annotations={"route.openshift.io/termination": "passthrough"}, + ), + ) + assert ( + cluster.cluster_dashboard_uri() + == "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(), + ) + assert cluster.cluster_uri() == "ray://unit-test-cluster-head-svc.ns.svc:10001" + assert ( + cluster.cluster_dashboard_uri() + == "http://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + ) + cluster.config.name = "fake" + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + ) + assert ( + cluster.cluster_dashboard_uri() + == "Dashboard not available yet, have you run cluster.up()?" + ) + + +def test_ray_job_wrapping(mocker): + import ray + + def ray_addr(self, *args): + return self._address + + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + cluster = cluster = createClusterWithConfig(mocker) + mocker.patch( + "ray.job_submission.JobSubmissionClient._check_connection_and_version_with_url", + return_value="None", + ) + mock_res = mocker.patch.object( + ray.job_submission.JobSubmissionClient, "list_jobs", autospec=True + ) + mock_res.side_effect = ray_addr + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": ""}}, + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(), + ) + assert cluster.list_jobs() == cluster.cluster_dashboard_uri() + + mock_res = mocker.patch.object( + ray.job_submission.JobSubmissionClient, "get_job_status", autospec=True + ) + mock_res.side_effect = ray_addr + assert cluster.job_status("fake_id") == cluster.cluster_dashboard_uri() + + mock_res = mocker.patch.object( + ray.job_submission.JobSubmissionClient, "get_job_logs", autospec=True + ) + mock_res.side_effect = ray_addr + assert cluster.job_logs("fake_id") == cluster.cluster_dashboard_uri() + + +def test_local_client_url(mocker): + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", + return_value={"spec": {"domain": ""}}, + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_ingress_domain", + return_value="rayclient-unit-test-cluster-localinter-ns.apps.cluster.awsroute.org", + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.Cluster.create_app_wrapper", + return_value="unit-test-cluster-localinter.yaml", + ) + + cluster_config = ClusterConfiguration( + name="unit-test-cluster-localinter", + namespace="ns", + ) + cluster = Cluster(cluster_config) + assert ( + cluster.local_client_url() + == "ray://rayclient-unit-test-cluster-localinter-ns.apps.cluster.awsroute.org" + ) + + +""" +get_cluster tests +""" + + +def test_get_cluster_openshift(mocker): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + # Mock the client.ApisApi function to return a mock object + mock_api = MagicMock() + mock_api.get_api_versions.return_value.groups = [ + MagicMock(versions=[MagicMock(group_version="route.openshift.io/v1")]) + ] + mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + + assert is_openshift_cluster() + + def custom_side_effect(group, version, namespace, plural, **kwargs): + if plural == "routes": + return route_list_retrieval("route.openshift.io", "v1", "ns", "routes") + elif plural == "rayclusters": + return get_ray_obj("ray.io", "v1", "ns", "rayclusters") + elif plural == "appwrappers": + return get_aw_obj("workload.codeflare.dev", "v1beta2", "ns", "appwrappers") + elif plural == "localqueues": + return get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", get_aw_obj + ) + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=custom_side_effect, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + return_value=get_named_aw, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=route_list_retrieval("route.openshift.io", "v1", "ns", "routes")[ + "items" + ], + ) + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + + cluster = get_cluster( + "test-cluster-a", "ns" + ) # see tests/test_cluster_yamls/support_clusters + cluster_config = cluster.config + + assert cluster_config.name == "test-cluster-a" and cluster_config.namespace == "ns" + assert cluster_config.head_cpu_requests == 2 and cluster_config.head_cpu_limits == 2 + assert ( + cluster_config.head_memory_requests == "8G" + and cluster_config.head_memory_limits == "8G" + ) + assert ( + cluster_config.worker_cpu_requests == 1 + and cluster_config.worker_cpu_limits == 1 + ) + assert ( + cluster_config.worker_memory_requests == "2G" + and cluster_config.worker_memory_limits == "2G" + ) + assert cluster_config.num_workers == 1 + assert cluster_config.write_to_file == False + assert cluster_config.local_queue == "local_default_queue" + + +def test_get_cluster(mocker): + # test get_cluster for Kind Clusters + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=get_named_aw, + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(cluster_name="quicktest", client_ing=True), + ) + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + cluster = get_cluster( + "test-cluster-a" + ) # see tests/test_cluster_yamls/support_clusters + cluster_config = cluster.config + + assert cluster_config.name == "test-cluster-a" and cluster_config.namespace == "ns" + assert cluster_config.head_cpu_requests == 2 and cluster_config.head_cpu_limits == 2 + assert ( + cluster_config.head_memory_requests == "8G" + and cluster_config.head_memory_limits == "8G" + ) + assert ( + cluster_config.worker_cpu_requests == 1 + and cluster_config.worker_cpu_limits == 1 + ) + assert ( + cluster_config.worker_memory_requests == "2G" + and cluster_config.worker_memory_limits == "2G" + ) + assert cluster_config.num_workers == 1 + assert cluster_config.write_to_file == False + assert cluster_config.local_queue == "local_default_queue" + + +def test_wait_ready(mocker, capsys): + from codeflare_sdk.ray.cluster.status import CodeFlareClusterStatus + + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(), + ) + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=None + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None + ) + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + mocker.patch.object( + client.CustomObjectsApi, + "list_namespaced_custom_object", + return_value={ + "items": [ + { + "metadata": {"name": "ray-dashboard-test"}, + "spec": {"host": "mocked-host"}, + } + ] + }, + ) + mock_response = mocker.Mock() + mock_response.status_code = 200 + mocker.patch("requests.get", return_value=mock_response) + cf = Cluster( + ClusterConfiguration( + name="test", + namespace="ns", + write_to_file=False, + appwrapper=True, + local_queue="local-queue-default", + ) + ) + try: + cf.wait_ready(timeout=5) + assert 1 == 0 + except Exception as e: + assert type(e) == TimeoutError + + captured = capsys.readouterr() + assert ( + "WARNING: Current cluster status is unknown, have you run cluster.up yet?" + in captured.out + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.Cluster.status", + return_value=(True, CodeFlareClusterStatus.READY), + ) + cf.wait_ready() + captured = capsys.readouterr() + assert ( + captured.out + == "Waiting for requested resources to be set up...\nRequested cluster is up and running!\nDashboard is ready!\n" + ) + cf.wait_ready(dashboard_check=False) + captured = capsys.readouterr() + assert ( + captured.out + == "Waiting for requested resources to be set up...\nRequested cluster is up and running!\n" + ) + + +def test_list_queue_appwrappers(mocker, capsys): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_obj_none( + "workload.codeflare.dev", "v1beta2", "ns", "appwrappers" + ), + ) + list_all_queued("ns", appwrapper=True) + captured = capsys.readouterr() + assert captured.out == ( + "╭──────────────────────────────────────────────────────────────────────────────╮\n" + "│ No resources found, have you run cluster.up() yet? │\n" + "╰──────────────────────────────────────────────────────────────────────────────╯\n" + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_aw_obj_with_status( + "workload.codeflare.dev", "v1beta2", "ns", "appwrappers" + ), + ) + list_all_queued("ns", appwrapper=True) + captured = capsys.readouterr() + print(captured.out) + assert captured.out == ( + "╭────────────────────────────────╮\n" + "│ 🚀 Cluster Queue Status 🚀 │\n" + "│ +----------------+-----------+ │\n" + "│ | Name | Status | │\n" + "│ +================+===========+ │\n" + "│ | test-cluster-a | running | │\n" + "│ | | | │\n" + "│ | test-cluster-b | suspended | │\n" + "│ | | | │\n" + "│ +----------------+-----------+ │\n" + "╰────────────────────────────────╯\n" + ) + + +def test_list_queue_rayclusters(mocker, capsys): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mock_api = MagicMock() + mock_api.get_api_versions.return_value.groups = [ + MagicMock(versions=[MagicMock(group_version="route.openshift.io/v1")]) + ] + mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) + + assert is_openshift_cluster() == True + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"), + ) + + list_all_queued("ns") + captured = capsys.readouterr() + assert captured.out == ( + "╭──────────────────────────────────────────────────────────────────────────────╮\n" + "│ No resources found, have you run cluster.up() yet? │\n" + "╰──────────────────────────────────────────────────────────────────────────────╯\n" + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_ray_obj_with_status("ray.io", "v1", "ns", "rayclusters"), + ) + + list_all_queued("ns") + captured = capsys.readouterr() + # print(captured.out) -> useful for updating the test + assert captured.out == ( + "╭────────────────────────────────╮\n" + "│ 🚀 Cluster Queue Status 🚀 │\n" + "│ +----------------+-----------+ │\n" + "│ | Name | Status | │\n" + "│ +================+===========+ │\n" + "│ | test-cluster-a | ready | │\n" + "│ | | | │\n" + "│ | test-rc-b | suspended | │\n" + "│ | | | │\n" + "│ +----------------+-----------+ │\n" + "╰────────────────────────────────╯\n" + ) + + +def test_list_clusters(mocker, capsys): + from codeflare_sdk.ray.cluster.cluster import list_all_clusters + + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_obj_none, + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + ) + list_all_clusters("ns") + captured = capsys.readouterr() + assert captured.out == ( + "╭──────────────────────────────────────────────────────────────────────────────╮\n" + "│ No resources found, have you run cluster.up() yet? │\n" + "╰──────────────────────────────────────────────────────────────────────────────╯\n" + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + list_all_clusters("ns") + captured = capsys.readouterr() + # print(captured.out) -> useful for updating the test + assert captured.out == ( + " 🚀 CodeFlare Cluster Details 🚀 \n" + " \n" + " ╭──────────────────────────────────────────────────────────────────╮ \n" + " │ Name │ \n" + " │ test-cluster-a Inactive ❌ │ \n" + " │ │ \n" + " │ URI: ray://test-cluster-a-head-svc.ns.svc:10001 │ \n" + " │ │ \n" + " │ Dashboard🔗 │ \n" + " │ │ \n" + " │ Cluster Resources │ \n" + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" + " │ │ # Workers │ │ Memory CPU GPU │ │ \n" + " │ │ │ │ │ │ \n" + " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" + " │ │ │ │ │ │ \n" + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" + " ╰──────────────────────────────────────────────────────────────────╯ \n" + "╭───────────────────────────────────────────────────────────────╮\n" + "│ Name │\n" + "│ test-rc-b Inactive ❌ │\n" + "│ │\n" + "│ URI: ray://test-rc-b-head-svc.ns.svc:10001 │\n" + "│ │\n" + "│ Dashboard🔗 │\n" + "│ │\n" + "│ Cluster Resources │\n" + "│ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │\n" + "│ │ # Workers │ │ Memory CPU GPU │ │\n" + "│ │ │ │ │ │\n" + "│ │ 1 │ │ 2G~2G 1~1 0 │ │\n" + "│ │ │ │ │ │\n" + "│ ╰─────────────╯ ╰──────────────────────────────────────╯ │\n" + "╰───────────────────────────────────────────────────────────────╯\n" + ) + + +def test_map_to_ray_cluster(mocker): + from codeflare_sdk.ray.cluster.cluster import _map_to_ray_cluster + + mocker.patch("kubernetes.config.load_kube_config") + + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.is_openshift_cluster", return_value=True + ) + + mock_api_client = mocker.MagicMock(spec=client.ApiClient) + mocker.patch( + "codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", + return_value=mock_api_client, + ) + + mock_routes = { + "items": [ + { + "apiVersion": "route.openshift.io/v1", + "kind": "Route", + "metadata": { + "name": "ray-dashboard-test-cluster-a", + "namespace": "ns", + }, + "spec": {"host": "ray-dashboard-test-cluster-a"}, + }, + ] + } + + def custom_side_effect(group, version, namespace, plural, **kwargs): + if plural == "routes": + return mock_routes + elif plural == "rayclusters": + return get_ray_obj("ray.io", "v1", "ns", "rayclusters") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=custom_side_effect, + ) + + rc = get_ray_obj("ray.io", "v1", "ns", "rayclusters")["items"][0] + rc_name = rc["metadata"]["name"] + rc_dashboard = f"http://ray-dashboard-{rc_name}" + + result = _map_to_ray_cluster(rc) + + assert result is not None + assert result.dashboard == rc_dashboard diff --git a/src/codeflare_sdk/ray/cluster/config_test.py b/src/codeflare_sdk/ray/cluster/config_test.py new file mode 100644 index 000000000..7afe141c6 --- /dev/null +++ b/src/codeflare_sdk/ray/cluster/config_test.py @@ -0,0 +1,164 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.common.utils.unit_test_support import createClusterWrongType +from codeflare_sdk.ray.cluster.cluster import ClusterConfiguration, Cluster +from pathlib import Path +from unittest.mock import patch +import filecmp +import pytest +import yaml +import os + +parent = Path(__file__).resolve().parents[4] # project directory +expected_clusters_dir = f"{parent}/tests/test_cluster_yamls" +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_default_cluster_creation(mocker): + # Create a Ray Cluster using the default config variables + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") + + cluster = Cluster( + ClusterConfiguration( + name="default-cluster", + namespace="ns", + ) + ) + + test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + with open(f"{expected_clusters_dir}/ray/default-ray-cluster.yaml") as f: + expected_rc = yaml.load(f, Loader=yaml.FullLoader) + assert test_rc == expected_rc + + +def test_default_appwrapper_creation(mocker): + # Create an AppWrapper using the default config variables + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") + + cluster = Cluster( + ClusterConfiguration(name="default-appwrapper", namespace="ns", appwrapper=True) + ) + + test_aw = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + with open(f"{expected_clusters_dir}/ray/default-appwrapper.yaml") as f: + expected_aw = yaml.load(f, Loader=yaml.FullLoader) + assert test_aw == expected_aw + + +@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) +def test_config_creation_all_parameters(mocker): + from codeflare_sdk.ray.cluster.config import DEFAULT_RESOURCE_MAPPING + + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + extended_resource_mapping = DEFAULT_RESOURCE_MAPPING + extended_resource_mapping.update({"example.com/gpu": "GPU"}) + + config = ClusterConfiguration( + name="test-all-params", + namespace="ns", + head_info=["test1", "test2"], + head_cpu_requests=4, + head_cpu_limits=8, + head_memory_requests=12, + head_memory_limits=16, + head_extended_resource_requests={"nvidia.com/gpu": 1}, + machine_types={"gpu.small", "gpu.large"}, + worker_cpu_requests=4, + worker_cpu_limits=8, + num_workers=10, + worker_memory_requests=12, + worker_memory_limits=16, + template=f"{parent}/src/codeflare_sdk/ray/templates/base-template.yaml", + appwrapper=False, + envs={"key1": "value1", "key2": "value2"}, + image="example/ray:tag", + image_pull_secrets=["secret1", "secret2"], + write_to_file=True, + verify_tls=True, + labels={"key1": "value1", "key2": "value2"}, + worker_extended_resource_requests={"nvidia.com/gpu": 1}, + extended_resource_mapping=extended_resource_mapping, + overwrite_default_resource_mapping=True, + local_queue="local-queue-default", + ) + Cluster(config) + + assert config.name == "test-all-params" and config.namespace == "ns" + assert config.head_info == ["test1", "test2"] + assert config.head_cpu_requests == 4 + assert config.head_cpu_limits == 8 + assert config.head_memory_requests == "12G" + assert config.head_memory_limits == "16G" + assert config.head_extended_resource_requests == {"nvidia.com/gpu": 1} + assert config.machine_types == {"gpu.small", "gpu.large"} + assert config.worker_cpu_requests == 4 + assert config.worker_cpu_limits == 8 + assert config.num_workers == 10 + assert config.worker_memory_requests == "12G" + assert config.worker_memory_limits == "16G" + assert ( + config.template + == f"{parent}/src/codeflare_sdk/ray/templates/base-template.yaml" + ) + assert config.appwrapper == False + assert config.envs == {"key1": "value1", "key2": "value2"} + assert config.image == "example/ray:tag" + assert config.image_pull_secrets == ["secret1", "secret2"] + assert config.write_to_file == True + assert config.verify_tls == True + assert config.labels == {"key1": "value1", "key2": "value2"} + assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 1} + assert config.extended_resource_mapping == extended_resource_mapping + assert config.overwrite_default_resource_mapping == True + assert config.local_queue == "local-queue-default" + + assert filecmp.cmp( + f"{aw_dir}test-all-params.yaml", + f"{expected_clusters_dir}/ray/unit-test-all-params.yaml", + shallow=True, + ) + + +def test_config_creation_wrong_type(): + with pytest.raises(TypeError): + createClusterWrongType() + + +def test_cluster_config_deprecation_conversion(mocker): + config = ClusterConfiguration( + name="test", + num_gpus=2, + head_gpus=1, + min_memory=3, + max_memory=4, + min_cpus=1, + max_cpus=2, + ) + assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 2} + assert config.head_extended_resource_requests == {"nvidia.com/gpu": 1} + assert config.worker_memory_requests == "3G" + assert config.worker_memory_limits == "4G" + assert config.worker_cpu_requests == 1 + assert config.worker_cpu_limits == 2 + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}test-all-params.yaml") diff --git a/src/codeflare_sdk/ray/cluster/generate_yaml_test.py b/src/codeflare_sdk/ray/cluster/generate_yaml_test.py new file mode 100644 index 000000000..68c6aa89b --- /dev/null +++ b/src/codeflare_sdk/ray/cluster/generate_yaml_test.py @@ -0,0 +1,34 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from codeflare_sdk.ray.cluster.generate_yaml import gen_names +import uuid + + +def test_gen_names_with_name(mocker): + mocker.patch.object( + uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001") + ) + name = "myname" + appwrapper_name, cluster_name = gen_names(name) + assert appwrapper_name == name + assert cluster_name == name + + +def test_gen_names_without_name(mocker): + mocker.patch.object( + uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001") + ) + appwrapper_name, cluster_name = gen_names(None) + assert appwrapper_name.startswith("appwrapper-") + assert cluster_name.startswith("cluster-") diff --git a/src/codeflare_sdk/ray/cluster/pretty_print_test.py b/src/codeflare_sdk/ray/cluster/pretty_print_test.py new file mode 100644 index 000000000..b0da42011 --- /dev/null +++ b/src/codeflare_sdk/ray/cluster/pretty_print_test.py @@ -0,0 +1,208 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.cluster.pretty_print import ( + print_app_wrappers_status, + print_cluster_status, + print_clusters, + print_no_resources_found, +) +from codeflare_sdk.ray.appwrapper.status import AppWrapperStatus, AppWrapper +from codeflare_sdk.ray.cluster.status import ( + RayCluster, + RayClusterStatus, + CodeFlareClusterStatus, +) +from codeflare_sdk.ray.cluster.cluster import ( + Cluster, + ClusterConfiguration, + _copy_to_ray, +) + + +def test_print_no_resources(capsys): + try: + print_no_resources_found() + except Exception: + assert 1 == 0 + captured = capsys.readouterr() + assert captured.out == ( + "╭──────────────────────────────────────────────────────────────────────────────╮\n" + "│ No resources found, have you run cluster.up() yet? │\n" + "╰──────────────────────────────────────────────────────────────────────────────╯\n" + ) + + +def test_print_appwrappers(capsys): + aw1 = AppWrapper( + name="awtest1", + status=AppWrapperStatus.SUSPENDED, + ) + aw2 = AppWrapper( + name="awtest2", + status=AppWrapperStatus.RUNNING, + ) + try: + print_app_wrappers_status([aw1, aw2]) + except Exception: + assert 1 == 0 + captured = capsys.readouterr() + assert captured.out == ( + "╭─────────────────────────╮\n" + "│ 🚀 Cluster Queue │\n" + "│ Status 🚀 │\n" + "│ +---------+-----------+ │\n" + "│ | Name | Status | │\n" + "│ +=========+===========+ │\n" + "│ | awtest1 | suspended | │\n" + "│ | | | │\n" + "│ | awtest2 | running | │\n" + "│ | | | │\n" + "│ +---------+-----------+ │\n" + "╰─────────────────────────╯\n" + ) + + +def test_ray_details(mocker, capsys): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + ray1 = RayCluster( + name="raytest1", + status=RayClusterStatus.READY, + num_workers=1, + worker_mem_requests="2G", + worker_mem_limits="2G", + worker_cpu_requests=1, + worker_cpu_limits=1, + namespace="ns", + dashboard="fake-uri", + head_cpu_requests=2, + head_cpu_limits=2, + head_mem_requests=8, + head_mem_limits=8, + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.Cluster.status", + return_value=(False, CodeFlareClusterStatus.UNKNOWN), + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster.Cluster.cluster_dashboard_uri", + return_value="", + ) + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + cf = Cluster( + ClusterConfiguration( + name="raytest2", + namespace="ns", + appwrapper=True, + local_queue="local_default_queue", + ) + ) + captured = capsys.readouterr() + ray2 = _copy_to_ray(cf) + details = cf.details() + assert details == ray2 + assert ray2.name == "raytest2" + assert ray1.namespace == ray2.namespace + assert ray1.num_workers == ray2.num_workers + assert ray1.worker_mem_requests == ray2.worker_mem_requests + assert ray1.worker_mem_limits == ray2.worker_mem_limits + assert ray1.worker_cpu_requests == ray2.worker_cpu_requests + assert ray1.worker_cpu_limits == ray2.worker_cpu_limits + assert ray1.worker_extended_resources == ray2.worker_extended_resources + try: + print_clusters([ray1, ray2]) + print_cluster_status(ray1) + print_cluster_status(ray2) + except Exception: + assert 0 == 1 + captured = capsys.readouterr() + assert captured.out == ( + " 🚀 CodeFlare Cluster Details 🚀 \n" + " \n" + " ╭───────────────────────────────────────────────────────────────╮ \n" + " │ Name │ \n" + " │ raytest2 Inactive ❌ │ \n" + " │ │ \n" + " │ URI: ray://raytest2-head-svc.ns.svc:10001 │ \n" + " │ │ \n" + " │ Dashboard🔗 │ \n" + " │ │ \n" + " │ Cluster Resources │ \n" + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" + " │ │ # Workers │ │ Memory CPU GPU │ │ \n" + " │ │ │ │ │ │ \n" + " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" + " │ │ │ │ │ │ \n" + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" + " ╰───────────────────────────────────────────────────────────────╯ \n" + " 🚀 CodeFlare Cluster Details 🚀 \n" + " \n" + " ╭───────────────────────────────────────────────────────────────╮ \n" + " │ Name │ \n" + " │ raytest1 Active ✅ │ \n" + " │ │ \n" + " │ URI: ray://raytest1-head-svc.ns.svc:10001 │ \n" + " │ │ \n" + " │ Dashboard🔗 │ \n" + " │ │ \n" + " │ Cluster Resources │ \n" + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" + " │ │ # Workers │ │ Memory CPU GPU │ │ \n" + " │ │ │ │ │ │ \n" + " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" + " │ │ │ │ │ │ \n" + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" + " ╰───────────────────────────────────────────────────────────────╯ \n" + "╭───────────────────────────────────────────────────────────────╮\n" + "│ Name │\n" + "│ raytest2 Inactive ❌ │\n" + "│ │\n" + "│ URI: ray://raytest2-head-svc.ns.svc:10001 │\n" + "│ │\n" + "│ Dashboard🔗 │\n" + "│ │\n" + "│ Cluster Resources │\n" + "│ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │\n" + "│ │ # Workers │ │ Memory CPU GPU │ │\n" + "│ │ │ │ │ │\n" + "│ │ 1 │ │ 2G~2G 1~1 0 │ │\n" + "│ │ │ │ │ │\n" + "│ ╰─────────────╯ ╰──────────────────────────────────────╯ │\n" + "╰───────────────────────────────────────────────────────────────╯\n" + " 🚀 CodeFlare Cluster Status 🚀 \n" + " \n" + " ╭──────────────────────────────────────────────────────────╮ \n" + " │ Name │ \n" + " │ raytest1 Active ✅ │ \n" + " │ │ \n" + " │ URI: ray://raytest1-head-svc.ns.svc:10001 │ \n" + " │ │ \n" + " │ Dashboard🔗 │ \n" + " │ │ \n" + " ╰──────────────────────────────────────────────────────────╯ \n" + " 🚀 CodeFlare Cluster Status 🚀 \n" + " \n" + " ╭────────────────────────────────────────────────────────────╮ \n" + " │ Name │ \n" + " │ raytest2 Inactive ❌ │ \n" + " │ │ \n" + " │ URI: ray://raytest2-head-svc.ns.svc:10001 │ \n" + " │ │ \n" + " │ Dashboard🔗 │ \n" + " │ │ \n" + " ╰────────────────────────────────────────────────────────────╯ \n" + ) diff --git a/src/codeflare_sdk/ray/cluster/status_test.py b/src/codeflare_sdk/ray/cluster/status_test.py new file mode 100644 index 000000000..146d21901 --- /dev/null +++ b/src/codeflare_sdk/ray/cluster/status_test.py @@ -0,0 +1,114 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.cluster.cluster import ( + Cluster, + ClusterConfiguration, + _ray_cluster_status, +) +from codeflare_sdk.ray.cluster.status import ( + CodeFlareClusterStatus, + RayClusterStatus, + RayCluster, +) +import os + +aw_dir = os.path.expanduser("~/.codeflare/resources/") + + +def test_cluster_status(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.common.kueue.kueue.local_queue_exists", + return_value="true", + ) + + fake_ray = RayCluster( + name="test", + status=RayClusterStatus.UNKNOWN, + num_workers=1, + worker_mem_requests=2, + worker_mem_limits=2, + worker_cpu_requests=1, + worker_cpu_limits=1, + namespace="ns", + dashboard="fake-uri", + head_cpu_requests=2, + head_cpu_limits=2, + head_mem_requests=8, + head_mem_limits=8, + ) + cf = Cluster( + ClusterConfiguration( + name="test", + namespace="ns", + write_to_file=True, + appwrapper=False, + local_queue="local_default_queue", + ) + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None + ) + status, ready = cf.status() + assert status == CodeFlareClusterStatus.UNKNOWN + assert ready == False + + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=fake_ray + ) + + status, ready = cf.status() + assert status == CodeFlareClusterStatus.STARTING + assert ready == False + + fake_ray.status = RayClusterStatus.FAILED + status, ready = cf.status() + assert status == CodeFlareClusterStatus.FAILED + assert ready == False + + fake_ray.status = RayClusterStatus.UNHEALTHY + status, ready = cf.status() + assert status == CodeFlareClusterStatus.FAILED + assert ready == False + + fake_ray.status = RayClusterStatus.READY + status, ready = cf.status() + assert status == CodeFlareClusterStatus.READY + assert ready == True + + +def rc_status_fields(group, version, namespace, plural, *args): + assert group == "ray.io" + assert version == "v1" + assert namespace == "test-ns" + assert plural == "rayclusters" + assert args == tuple() + return {"items": []} + + +def test_rc_status(mocker): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=rc_status_fields, + ) + rc = _ray_cluster_status("test-rc", "test-ns") + assert rc == None + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}test.yaml") diff --git a/tests/demo_test.py b/tests/demo_test.py deleted file mode 100644 index b54530580..000000000 --- a/tests/demo_test.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2022 IBM, Red Hat -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -These were the old tests used during initial demo building, and they will soon be fully deprecated. -""" - -from codeflare_sdk.ray.cluster.cluster import ( - list_all_clusters, - list_all_queued, - _app_wrapper_status, -) -from codeflare_sdk.ray.cluster.cluster import Cluster, ClusterConfiguration - -import time - -# FIXME - These tests currently assume OC logged in, and not self-contained unit/funcitonal tests - - -def test_cluster_up(): - cluster = Cluster(ClusterConfiguration(name="raycluster-autoscaler")) - cluster.up() - time.sleep(15) - - -def test_list_clusters(): - clusters = list_all_clusters() - - -def test_cluster_status(): - cluster = Cluster(ClusterConfiguration(name="raycluster-autoscaler")) - cluster.status() - - -def test_app_wrapper_status(): - print(_app_wrapper_status("raycluster-autoscaler")) - - -def test_cluster_down(): - cluster = Cluster(ClusterConfiguration(name="raycluster-autoscaler")) - cluster.down() - - -def test_no_resources_found(): - from codeflare_sdk.ray.cluster import pretty_print - - pretty_print.print_no_resources_found() - - -def test_list_app_wrappers(): - app_wrappers = list_all_queued() diff --git a/tests/func_test.py b/tests/func_test.py deleted file mode 100644 index 6b5799c39..000000000 --- a/tests/func_test.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2022 IBM, Red Hat -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pathlib import Path -import sys - -parent = Path(__file__).resolve().parents[1] -sys.path.append(str(parent) + "/src") - -# COMING SOON! diff --git a/tests/test-case-bad.yaml b/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml similarity index 100% rename from tests/test-case-bad.yaml rename to tests/test_cluster_yamls/appwrapper/test-case-bad.yaml diff --git a/tests/test-case.yaml b/tests/test_cluster_yamls/kueue/aw_kueue.yaml similarity index 93% rename from tests/test-case.yaml rename to tests/test_cluster_yamls/kueue/aw_kueue.yaml index c03422cf8..2c6d868ac 100644 --- a/tests/test-case.yaml +++ b/tests/test_cluster_yamls/kueue/aw_kueue.yaml @@ -3,7 +3,7 @@ kind: AppWrapper metadata: labels: kueue.x-k8s.io/queue-name: local-queue-default - name: unit-test-cluster + name: unit-test-aw-kueue namespace: ns spec: components: @@ -13,7 +13,7 @@ spec: metadata: labels: controller-tools.k8s.io: '1.0' - name: unit-test-cluster + name: unit-test-aw-kueue namespace: ns spec: autoscalerOptions: @@ -76,8 +76,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: @@ -95,12 +94,12 @@ spec: name: odh-ca-cert rayVersion: 2.35.0 workerGroupSpecs: - - groupName: small-group-unit-test-cluster + - groupName: small-group-unit-test-aw-kueue maxReplicas: 2 minReplicas: 2 rayStartParams: block: 'true' - num-gpus: '7' + num-gpus: '0' resources: '"{}"' replicas: 2 template: @@ -124,11 +123,9 @@ spec: limits: cpu: 4 memory: 6G - nvidia.com/gpu: 7 requests: cpu: 3 memory: 5G - nvidia.com/gpu: 7 volumeMounts: - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert @@ -142,8 +139,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml new file mode 100644 index 000000000..0c4efb29a --- /dev/null +++ b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml @@ -0,0 +1,157 @@ +apiVersion: workload.codeflare.dev/v1beta2 +kind: AppWrapper +metadata: + labels: + kueue.x-k8s.io/queue-name: local-queue-default + name: unit-test-cluster-kueue + namespace: ns +spec: + components: + - template: + apiVersion: ray.io/v1 + kind: RayCluster + metadata: + labels: + controller-tools.k8s.io: '1.0' + name: unit-test-cluster-kueue + namespace: ns + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + resources: '"{}"' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + requests: + cpu: 2 + memory: 8G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.35.0 + workerGroupSpecs: + - groupName: small-group-unit-test-cluster-kueue + maxReplicas: 2 + minReplicas: 2 + rayStartParams: + block: 'true' + num-gpus: '0' + resources: '"{}"' + replicas: 2 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 4 + memory: 6G + requests: + cpu: 3 + memory: 5G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/test_cluster_yamls/ray/default-appwrapper.yaml b/tests/test_cluster_yamls/ray/default-appwrapper.yaml new file mode 100644 index 000000000..60152c1e7 --- /dev/null +++ b/tests/test_cluster_yamls/ray/default-appwrapper.yaml @@ -0,0 +1,155 @@ +apiVersion: workload.codeflare.dev/v1beta2 +kind: AppWrapper +metadata: + name: default-appwrapper + namespace: ns +spec: + components: + - template: + apiVersion: ray.io/v1 + kind: RayCluster + metadata: + labels: + controller-tools.k8s.io: '1.0' + name: default-appwrapper + namespace: ns + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + resources: '"{}"' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + requests: + cpu: 2 + memory: 8G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.35.0 + workerGroupSpecs: + - groupName: small-group-default-appwrapper + maxReplicas: 1 + minReplicas: 1 + rayStartParams: + block: 'true' + num-gpus: '0' + resources: '"{}"' + replicas: 1 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 1 + memory: 2G + requests: + cpu: 1 + memory: 2G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/test-case-no-kueue-no-aw.yaml b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml similarity index 88% rename from tests/test-case-no-kueue-no-aw.yaml rename to tests/test_cluster_yamls/ray/default-ray-cluster.yaml index ea90a275a..7a3329b6d 100644 --- a/tests/test-case-no-kueue-no-aw.yaml +++ b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml @@ -1,11 +1,9 @@ apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - app.kubernetes.io/managed-by: test-prefix labels: controller-tools.k8s.io: '1.0' - name: unit-test-no-kueue + name: default-cluster namespace: ns spec: autoscalerOptions: @@ -68,8 +66,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: @@ -87,14 +84,14 @@ spec: name: odh-ca-cert rayVersion: 2.35.0 workerGroupSpecs: - - groupName: small-group-unit-test-no-kueue - maxReplicas: 2 - minReplicas: 2 + - groupName: small-group-default-cluster + maxReplicas: 1 + minReplicas: 1 rayStartParams: block: 'true' - num-gpus: '7' + num-gpus: '0' resources: '"{}"' - replicas: 2 + replicas: 1 template: metadata: annotations: @@ -114,13 +111,11 @@ spec: name: machine-learning resources: limits: - cpu: 4 - memory: 6G - nvidia.com/gpu: 7 + cpu: 1 + memory: 2G requests: - cpu: 3 - memory: 5G - nvidia.com/gpu: 7 + cpu: 1 + memory: 2G volumeMounts: - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert @@ -134,8 +129,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test-case-custom-image.yaml b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml similarity index 83% rename from tests/test-case-custom-image.yaml rename to tests/test_cluster_yamls/ray/unit-test-all-params.yaml index d7e525076..eda7270f6 100644 --- a/tests/test-case-custom-image.yaml +++ b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml @@ -5,10 +5,10 @@ metadata: app.kubernetes.io/managed-by: test-prefix labels: controller-tools.k8s.io: '1.0' + key1: value1 + key2: value2 kueue.x-k8s.io/queue-name: local-queue-default - testlabel: test - testlabel2: test - name: unit-test-cluster-custom-image + name: test-all-params namespace: ns spec: autoscalerOptions: @@ -28,13 +28,16 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - num-gpus: '0' + num-gpus: '1' resources: '"{}"' serviceType: ClusterIP template: spec: containers: - - image: quay.io/project-codeflare/ray:2.20.0-py39-cu118 + - env: &id001 + key1: value1 + key2: value2 + image: example/ray:tag imagePullPolicy: Always lifecycle: preStop: @@ -53,11 +56,13 @@ spec: name: client resources: limits: - cpu: 2 - memory: 8G + cpu: 8 + memory: 16G + nvidia.com/gpu: 1 requests: - cpu: 2 - memory: 8G + cpu: 4 + memory: 12G + nvidia.com/gpu: 1 volumeMounts: - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert @@ -72,7 +77,8 @@ spec: name: odh-ca-cert subPath: odh-ca-bundle.crt imagePullSecrets: - - name: unit-test-pull-secret + - name: secret1 + - name: secret2 volumes: - configMap: items: @@ -90,14 +96,14 @@ spec: name: odh-ca-cert rayVersion: 2.35.0 workerGroupSpecs: - - groupName: small-group-unit-test-cluster-custom-image - maxReplicas: 2 - minReplicas: 2 + - groupName: small-group-test-all-params + maxReplicas: 10 + minReplicas: 10 rayStartParams: block: 'true' - num-gpus: '7' + num-gpus: '1' resources: '"{}"' - replicas: 2 + replicas: 10 template: metadata: annotations: @@ -106,7 +112,8 @@ spec: key: value spec: containers: - - image: quay.io/project-codeflare/ray:2.20.0-py39-cu118 + - env: *id001 + image: example/ray:tag lifecycle: preStop: exec: @@ -117,13 +124,13 @@ spec: name: machine-learning resources: limits: - cpu: 4 - memory: 6G - nvidia.com/gpu: 7 + cpu: 8 + memory: 16G + nvidia.com/gpu: 1 requests: - cpu: 3 - memory: 5G - nvidia.com/gpu: 7 + cpu: 4 + memory: 12G + nvidia.com/gpu: 1 volumeMounts: - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert @@ -138,7 +145,8 @@ spec: name: odh-ca-cert subPath: odh-ca-bundle.crt imagePullSecrets: - - name: unit-test-pull-secret + - name: secret1 + - name: secret2 volumes: - configMap: items: diff --git a/tests/test-default-appwrapper.yaml b/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml similarity index 95% rename from tests/test-default-appwrapper.yaml rename to tests/test_cluster_yamls/support_clusters/test-aw-a.yaml index 0780a46e1..9b8a647f6 100644 --- a/tests/test-default-appwrapper.yaml +++ b/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml @@ -2,9 +2,9 @@ apiVersion: workload.codeflare.dev/v1beta2 kind: AppWrapper metadata: labels: - kueue.x-k8s.io/queue-name: local-queue-default - name: unit-test-default-cluster - namespace: opendatahub + kueue.x-k8s.io/queue-name: local_default_queue + name: test-cluster-a + namespace: ns spec: components: - template: @@ -13,8 +13,8 @@ spec: metadata: labels: controller-tools.k8s.io: '1.0' - name: unit-test-default-cluster - namespace: opendatahub + name: test-cluster-a + namespace: ns spec: autoscalerOptions: idleTimeoutSeconds: 60 @@ -38,7 +38,6 @@ spec: serviceType: ClusterIP template: spec: - imagePullSecrets: [] containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 imagePullPolicy: Always @@ -77,6 +76,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt + imagePullSecrets: [] volumes: - configMap: items: @@ -94,7 +94,7 @@ spec: name: odh-ca-cert rayVersion: 2.35.0 workerGroupSpecs: - - groupName: small-group-unit-test-default-cluster + - groupName: small-group-test-cluster-a maxReplicas: 1 minReplicas: 1 rayStartParams: @@ -109,7 +109,6 @@ spec: labels: key: value spec: - imagePullSecrets: [] containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 lifecycle: @@ -140,6 +139,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt + imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml b/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml new file mode 100644 index 000000000..763eb5c2a --- /dev/null +++ b/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml @@ -0,0 +1,157 @@ +apiVersion: workload.codeflare.dev/v1beta2 +kind: AppWrapper +metadata: + labels: + kueue.x-k8s.io/queue-name: local_default_queue + name: test-cluster-b + namespace: ns +spec: + components: + - template: + apiVersion: ray.io/v1 + kind: RayCluster + metadata: + labels: + controller-tools.k8s.io: '1.0' + name: test-cluster-b + namespace: ns + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + resources: '"{}"' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + requests: + cpu: 2 + memory: 8G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.35.0 + workerGroupSpecs: + - groupName: small-group-test-cluster-b + maxReplicas: 1 + minReplicas: 1 + rayStartParams: + block: 'true' + num-gpus: '0' + resources: '"{}"' + replicas: 1 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 1 + memory: 2G + requests: + cpu: 1 + memory: 2G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/test-case-no-mcad.yamls b/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml similarity index 86% rename from tests/test-case-no-mcad.yamls rename to tests/test_cluster_yamls/support_clusters/test-rc-a.yaml index 36ce8e262..f12ffde00 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml @@ -1,14 +1,10 @@ apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - app.kubernetes.io/managed-by: test-prefix labels: controller-tools.k8s.io: '1.0' - kueue.x-k8s.io/queue-name: local-queue-default - testlabel: test - testlabel2: test - name: unit-test-cluster-ray + kueue.x-k8s.io/queue-name: local_default_queue + name: test-cluster-a namespace: ns spec: autoscalerOptions: @@ -71,8 +67,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: @@ -90,14 +85,14 @@ spec: name: odh-ca-cert rayVersion: 2.35.0 workerGroupSpecs: - - groupName: small-group-unit-test-cluster-ray - maxReplicas: 2 - minReplicas: 2 + - groupName: small-group-test-cluster-a + maxReplicas: 1 + minReplicas: 1 rayStartParams: block: 'true' - num-gpus: '7' + num-gpus: '0' resources: '"{}"' - replicas: 2 + replicas: 1 template: metadata: annotations: @@ -117,13 +112,11 @@ spec: name: machine-learning resources: limits: - cpu: 4 - memory: 6G - nvidia.com/gpu: 7 + cpu: 1 + memory: 2G requests: - cpu: 3 - memory: 5G - nvidia.com/gpu: 7 + cpu: 1 + memory: 2G volumeMounts: - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert @@ -137,8 +130,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: - - name: unit-test-pull-secret + imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml b/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml new file mode 100644 index 000000000..1d41e365f --- /dev/null +++ b/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml @@ -0,0 +1,148 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + labels: + controller-tools.k8s.io: '1.0' + kueue.x-k8s.io/queue-name: local_default_queue + name: test-rc-b + namespace: ns +spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + resources: '"{}"' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + requests: + cpu: 2 + memory: 8G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.35.0 + workerGroupSpecs: + - groupName: small-group-test-rc-b + maxReplicas: 1 + minReplicas: 1 + rayStartParams: + block: 'true' + num-gpus: '0' + resources: '"{}"' + replicas: 1 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 1 + memory: 2G + requests: + cpu: 1 + memory: 2G + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/unit_test.py b/tests/unit_test.py deleted file mode 100644 index 1f11643bd..000000000 --- a/tests/unit_test.py +++ /dev/null @@ -1,3350 +0,0 @@ -# Copyright 2022 IBM, Red Hat -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import filecmp -import os -import re -import sys -import uuid -from pathlib import Path - -parent = Path(__file__).resolve().parents[1] -aw_dir = os.path.expanduser("~/.codeflare/resources/") -sys.path.append(str(parent) + "/src") - -from unittest.mock import MagicMock, patch - -import openshift -import pandas as pd -import pytest -import ray -import yaml -from kubernetes import client, config -from pytest_mock import MockerFixture -from ray.job_submission import JobSubmissionClient - -import codeflare_sdk.common.widgets.widgets as cf_widgets -from codeflare_sdk.common.kubernetes_cluster import ( - Authentication, - KubeConfigFileAuthentication, - TokenAuthentication, - config_check, -) -from codeflare_sdk.common.utils.generate_cert import ( - export_env, - generate_ca_cert, - generate_tls_cert, -) -from codeflare_sdk.ray.appwrapper.awload import AWManager -from codeflare_sdk.ray.appwrapper.status import AppWrapper, AppWrapperStatus -from codeflare_sdk.ray.client.ray_jobs import RayJobClient -from codeflare_sdk.ray.cluster.cluster import ( - Cluster, - ClusterConfiguration, - _app_wrapper_status, - _copy_to_ray, - _map_to_ray_cluster, - _ray_cluster_status, - get_cluster, - list_all_clusters, - list_all_queued, -) -from codeflare_sdk.ray.cluster.generate_yaml import gen_names, is_openshift_cluster -from codeflare_sdk.ray.cluster.pretty_print import ( - print_app_wrappers_status, - print_cluster_status, - print_clusters, - print_no_resources_found, -) -from codeflare_sdk.ray.cluster.status import ( - CodeFlareClusterStatus, - RayCluster, - RayClusterStatus, -) -from tests.unit_test_support import ( - createClusterConfig, - createClusterWithConfig, - createClusterWrongType, - get_package_and_version, -) - -# For mocking openshift client results -fake_res = openshift.Result("fake") - - -def mock_routes_api(mocker): - mocker.patch.object( - "_route_api_getter", - return_value=MagicMock( - resources=MagicMock( - get=MagicMock( - return_value=MagicMock( - create=MagicMock(), - replace=MagicMock(), - delete=MagicMock(), - ) - ) - ) - ), - ) - - -def arg_side_effect(*args): - fake_res.high_level_operation = args - return fake_res - - -def att_side_effect(self): - return self.high_level_operation - - -def test_token_auth_creation(): - try: - token_auth = TokenAuthentication(token="token", server="server") - assert token_auth.token == "token" - assert token_auth.server == "server" - assert token_auth.skip_tls == False - assert token_auth.ca_cert_path == None - - token_auth = TokenAuthentication(token="token", server="server", skip_tls=True) - assert token_auth.token == "token" - assert token_auth.server == "server" - assert token_auth.skip_tls == True - assert token_auth.ca_cert_path == None - - os.environ["CF_SDK_CA_CERT_PATH"] = "/etc/pki/tls/custom-certs/ca-bundle.crt" - token_auth = TokenAuthentication(token="token", server="server", skip_tls=False) - assert token_auth.token == "token" - assert token_auth.server == "server" - assert token_auth.skip_tls == False - assert token_auth.ca_cert_path == "/etc/pki/tls/custom-certs/ca-bundle.crt" - os.environ.pop("CF_SDK_CA_CERT_PATH") - - token_auth = TokenAuthentication( - token="token", - server="server", - skip_tls=False, - ca_cert_path=f"{parent}/tests/auth-test.crt", - ) - assert token_auth.token == "token" - assert token_auth.server == "server" - assert token_auth.skip_tls == False - assert token_auth.ca_cert_path == f"{parent}/tests/auth-test.crt" - - except Exception: - assert 0 == 1 - - -def test_token_auth_login_logout(mocker): - mocker.patch.object(client, "ApiClient") - - token_auth = TokenAuthentication( - token="testtoken", server="testserver:6443", skip_tls=False, ca_cert_path=None - ) - assert token_auth.login() == ("Logged into testserver:6443") - assert token_auth.logout() == ("Successfully logged out of testserver:6443") - - -def test_token_auth_login_tls(mocker): - mocker.patch.object(client, "ApiClient") - - token_auth = TokenAuthentication( - token="testtoken", server="testserver:6443", skip_tls=True, ca_cert_path=None - ) - assert token_auth.login() == ("Logged into testserver:6443") - token_auth = TokenAuthentication( - token="testtoken", server="testserver:6443", skip_tls=False, ca_cert_path=None - ) - assert token_auth.login() == ("Logged into testserver:6443") - token_auth = TokenAuthentication( - token="testtoken", - server="testserver:6443", - skip_tls=False, - ca_cert_path=f"{parent}/tests/auth-test.crt", - ) - assert token_auth.login() == ("Logged into testserver:6443") - - os.environ["CF_SDK_CA_CERT_PATH"] = f"{parent}/tests/auth-test.crt" - token_auth = TokenAuthentication( - token="testtoken", - server="testserver:6443", - skip_tls=False, - ) - assert token_auth.login() == ("Logged into testserver:6443") - - -def test_config_check_no_config_file(mocker): - mocker.patch("os.path.expanduser", return_value="/mock/home/directory") - mocker.patch("os.path.isfile", return_value=False) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) - - with pytest.raises(PermissionError): - config_check() - - -def test_config_check_with_incluster_config(mocker): - mocker.patch("os.path.expanduser", return_value="/mock/home/directory") - mocker.patch("os.path.isfile", return_value=False) - mocker.patch.dict(os.environ, {"KUBERNETES_PORT": "number"}) - mocker.patch("kubernetes.config.load_incluster_config", side_effect=None) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) - - result = config_check() - assert result == None - - -def test_config_check_with_existing_config_file(mocker): - mocker.patch("os.path.expanduser", return_value="/mock/home/directory") - mocker.patch("os.path.isfile", return_value=True) - mocker.patch("kubernetes.config.load_kube_config", side_effect=None) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.config_path", None) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) - - result = config_check() - assert result == None - - -def test_config_check_with_config_path_and_no_api_client(mocker): - mocker.patch( - "codeflare_sdk.common.kubernetes_cluster.auth.config_path", "/mock/config/path" - ) - mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.api_client", None) - result = config_check() - assert result == "/mock/config/path" - - -def test_load_kube_config(mocker): - mocker.patch.object(config, "load_kube_config") - kube_config_auth = KubeConfigFileAuthentication( - kube_config_path="/path/to/your/config" - ) - response = kube_config_auth.load_kube_config() - - assert ( - response - == "Loaded user config file at path %s" % kube_config_auth.kube_config_path - ) - - kube_config_auth = KubeConfigFileAuthentication(kube_config_path=None) - response = kube_config_auth.load_kube_config() - assert response == "Please specify a config file path" - - -def test_auth_coverage(): - abstract = Authentication() - abstract.login() - abstract.logout() - - -def test_config_creation(): - config = createClusterConfig() - - assert config.name == "unit-test-cluster" and config.namespace == "ns" - assert config.num_workers == 2 - assert config.worker_cpu_requests == 3 and config.worker_cpu_limits == 4 - assert config.worker_memory_requests == "5G" and config.worker_memory_limits == "6G" - assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 7} - assert ( - config.template - == f"{parent}/src/codeflare_sdk/ray/templates/base-template.yaml" - ) - assert config.machine_types == ["cpu.small", "gpu.large"] - assert config.image_pull_secrets == ["unit-test-pull-secret"] - assert config.appwrapper == True - - -def test_config_creation_wrong_type(): - with pytest.raises(TypeError): - createClusterWrongType() - - -def test_cluster_creation(mocker): - # Create AppWrapper containing a Ray Cluster with no local queue specified - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - cluster = createClusterWithConfig(mocker) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster.yaml", - f"{parent}/tests/test-case.yaml", - shallow=True, - ) - - -@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) -def test_cluster_no_kueue_no_aw(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") - config = createClusterConfig() - config.appwrapper = False - config.name = "unit-test-no-kueue" - config.write_to_file = True - cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-no-kueue.yaml" - assert cluster.config.local_queue == None - assert filecmp.cmp( - f"{aw_dir}unit-test-no-kueue.yaml", - f"{parent}/tests/test-case-no-kueue-no-aw.yaml", - shallow=True, - ) - - -def get_local_queue(group, version, namespace, plural): - assert group == "kueue.x-k8s.io" - assert version == "v1beta1" - assert namespace == "ns" - assert plural == "localqueues" - local_queues = { - "apiVersion": "kueue.x-k8s.io/v1beta1", - "items": [ - { - "apiVersion": "kueue.x-k8s.io/v1beta1", - "kind": "LocalQueue", - "metadata": { - "annotations": {"kueue.x-k8s.io/default-queue": "true"}, - "name": "local-queue-default", - "namespace": "ns", - }, - "spec": {"clusterQueue": "cluster-queue"}, - }, - { - "apiVersion": "kueue.x-k8s.io/v1beta1", - "kind": "LocalQueue", - "metadata": { - "name": "team-a-queue", - "namespace": "ns", - }, - "spec": {"clusterQueue": "team-a-queue"}, - }, - ], - "kind": "LocalQueueList", - "metadata": {"continue": "", "resourceVersion": "2266811"}, - } - return local_queues - - -@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) -def test_cluster_creation_no_mcad(mocker): - # Create Ray Cluster with no local queue specified - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - - config = createClusterConfig() - config.name = "unit-test-cluster-ray" - config.write_to_file = True - config.labels = {"testlabel": "test", "testlabel2": "test"} - config.appwrapper = False - cluster = Cluster(config) - - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-ray" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster-ray.yaml", - f"{parent}/tests/test-case-no-mcad.yamls", - shallow=True, - ) - - -@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) -def test_cluster_creation_no_mcad_local_queue(mocker): - # With written resources - # Create Ray Cluster with local queue specified - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - config = createClusterConfig() - config.name = "unit-test-cluster-ray" - config.appwrapper = False - config.write_to_file = True - config.local_queue = "local-queue-default" - config.labels = {"testlabel": "test", "testlabel2": "test"} - cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-ray" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster-ray.yaml", - f"{parent}/tests/test-case-no-mcad.yamls", - shallow=True, - ) - # With resources loaded in memory - config = ClusterConfiguration( - name="unit-test-cluster-ray", - namespace="ns", - num_workers=2, - worker_cpu_requests=3, - worker_cpu_limits=4, - worker_memory_requests=5, - worker_memory_limits=6, - worker_extended_resource_requests={"nvidia.com/gpu": 7}, - machine_types=["cpu.small", "gpu.large"], - image_pull_secrets=["unit-test-pull-secret"], - write_to_file=True, - appwrapper=False, - local_queue="local-queue-default", - labels={"testlabel": "test", "testlabel2": "test"}, - ) - cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-ray" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster-ray.yaml", - f"{parent}/tests/test-case-no-mcad.yamls", - shallow=True, - ) - - -def test_default_cluster_creation(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", - return_value="opendatahub", - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - default_config = ClusterConfiguration( - name="unit-test-default-cluster", - appwrapper=True, - ) - cluster = Cluster(default_config) - test_aw = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) - - with open( - f"{parent}/tests/test-default-appwrapper.yaml", - ) as f: - default_aw = yaml.load(f, Loader=yaml.FullLoader) - assert test_aw == default_aw - - assert cluster.app_wrapper_name == "unit-test-default-cluster" - assert cluster.config.namespace == "opendatahub" - - -@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) -def test_cluster_creation_with_custom_image(mocker): - # With written resources - # Create Ray Cluster with local queue specified - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - config = createClusterConfig() - config.name = "unit-test-cluster-custom-image" - config.appwrapper = False - config.image = "quay.io/project-codeflare/ray:2.20.0-py39-cu118" - config.local_queue = "local-queue-default" - config.labels = {"testlabel": "test", "testlabel2": "test"} - cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-custom-image" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster-custom-image.yaml", - f"{parent}/tests/test-case-custom-image.yaml", - shallow=True, - ) - # With resources loaded in memory - config = ClusterConfiguration( - name="unit-test-cluster-custom-image", - namespace="ns", - num_workers=2, - worker_cpu_requests=3, - worker_cpu_limits=4, - worker_memory_requests=5, - worker_memory_limits=6, - worker_extended_resource_requests={"nvidia.com/gpu": 7}, - machine_types=["cpu.small", "gpu.large"], - image_pull_secrets=["unit-test-pull-secret"], - image="quay.io/project-codeflare/ray:2.20.0-py39-cu118", - write_to_file=True, - appwrapper=False, - local_queue="local-queue-default", - labels={"testlabel": "test", "testlabel2": "test"}, - ) - cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-custom-image" - assert filecmp.cmp( - f"{aw_dir}unit-test-cluster-custom-image.yaml", - f"{parent}/tests/test-case-custom-image.yaml", - shallow=True, - ) - - -def test_gen_names_with_name(mocker): - mocker.patch.object( - uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001") - ) - name = "myname" - appwrapper_name, cluster_name = gen_names(name) - assert appwrapper_name == name - assert cluster_name == name - - -def test_gen_names_without_name(mocker): - mocker.patch.object( - uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001") - ) - appwrapper_name, cluster_name = gen_names(None) - assert appwrapper_name.startswith("appwrapper-") - assert cluster_name.startswith("cluster-") - - -def arg_check_apply_effect(group, version, namespace, plural, body, *args): - assert namespace == "ns" - assert args == tuple() - if plural == "appwrappers": - assert group == "workload.codeflare.dev" - assert version == "v1beta2" - with open(f"{aw_dir}unit-test-cluster.yaml") as f: - aw = yaml.load(f, Loader=yaml.FullLoader) - assert body == aw - elif plural == "rayclusters": - assert group == "ray.io" - assert version == "v1" - with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: - yamls = yaml.load_all(f, Loader=yaml.FullLoader) - for resource in yamls: - if resource["kind"] == "RayCluster": - assert body == resource - elif plural == "ingresses": - assert group == "networking.k8s.io" - assert version == "v1" - with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: - yamls = yaml.load_all(f, Loader=yaml.FullLoader) - for resource in yamls: - if resource["kind"] == "Ingress": - assert body == resource - elif plural == "routes": - assert group == "route.openshift.io" - assert version == "v1" - with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: - yamls = yaml.load_all(f, Loader=yaml.FullLoader) - for resource in yamls: - if resource["kind"] == "Ingress": - assert body == resource - else: - assert 1 == 0 - - -def arg_check_del_effect(group, version, namespace, plural, name, *args): - assert namespace == "ns" - assert args == tuple() - if plural == "appwrappers": - assert group == "workload.codeflare.dev" - assert version == "v1beta2" - assert name == "unit-test-cluster" - elif plural == "rayclusters": - assert group == "ray.io" - assert version == "v1" - assert name == "unit-test-cluster-ray" - elif plural == "ingresses": - assert group == "networking.k8s.io" - assert version == "v1" - assert name == "ray-dashboard-unit-test-cluster-ray" - - -def test_cluster_up_down(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", - side_effect=arg_check_apply_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", - side_effect=arg_check_del_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", - return_value={"items": []}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - cluster = cluster = createClusterWithConfig(mocker) - cluster.up() - cluster.down() - - -def test_cluster_up_down_no_mcad(mocker): - mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", - side_effect=arg_check_apply_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", - side_effect=arg_check_del_effect, - ) - mocker.patch( - "kubernetes.client.CoreV1Api.create_namespaced_secret", - ) - mocker.patch( - "kubernetes.client.CoreV1Api.delete_namespaced_secret", - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", - return_value={"items": []}, - ) - config = createClusterConfig() - config.name = "unit-test-cluster-ray" - config.appwrapper = False - cluster = Cluster(config) - cluster.up() - cluster.down() - - -def arg_check_list_effect(group, version, plural, name, *args): - assert group == "config.openshift.io" - assert version == "v1" - assert plural == "ingresses" - assert name == "cluster" - assert args == tuple() - return {"spec": {"domain": "test"}} - - -""" We need to fix get_current_namespace in order to reuse this test. -def test_get_ingress_domain(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - side_effect=arg_check_list_effect, - ) - domain = _get_ingress_domain() - assert domain == "test" -""" - - -def aw_status_fields(group, version, namespace, plural, *args): - assert group == "workload.codeflare.dev" - assert version == "v1beta2" - assert namespace == "test-ns" - assert plural == "appwrappers" - assert args == tuple() - return {"items": []} - - -def test_aw_status(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=aw_status_fields, - ) - aw = _app_wrapper_status("test-aw", "test-ns") - assert aw == None - - -def rc_status_fields(group, version, namespace, plural, *args): - assert group == "ray.io" - assert version == "v1" - assert namespace == "test-ns" - assert plural == "rayclusters" - assert args == tuple() - return {"items": []} - - -def test_rc_status(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=rc_status_fields, - ) - rc = _ray_cluster_status("test-rc", "test-ns") - assert rc == None - - -def test_cluster_uris(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._get_ingress_domain", - return_value="apps.cluster.awsroute.org", - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - cluster = cluster = createClusterWithConfig(mocker) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval( - cluster_name="unit-test-cluster", - annotations={"route.openshift.io/termination": "passthrough"}, - ), - ) - assert ( - cluster.cluster_dashboard_uri() - == "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(), - ) - assert cluster.cluster_uri() == "ray://unit-test-cluster-head-svc.ns.svc:10001" - assert ( - cluster.cluster_dashboard_uri() - == "http://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" - ) - cluster.config.name = "fake" - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - ) - assert ( - cluster.cluster_dashboard_uri() - == "Dashboard not available yet, have you run cluster.up()?" - ) - - -def test_local_client_url(mocker): - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._get_ingress_domain", - return_value="rayclient-unit-test-cluster-localinter-ns.apps.cluster.awsroute.org", - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.create_app_wrapper", - return_value="unit-test-cluster-localinter.yaml", - ) - - cluster_config = ClusterConfiguration( - name="unit-test-cluster-localinter", - namespace="ns", - write_to_file=True, - ) - cluster = Cluster(cluster_config) - assert ( - cluster.local_client_url() - == "ray://rayclient-unit-test-cluster-localinter-ns.apps.cluster.awsroute.org" - ) - - -def ray_addr(self, *args): - return self._address - - -def mocked_ingress(port, cluster_name="unit-test-cluster", annotations: dict = None): - labels = {"ingress-owner": cluster_name} - if port == 10001: - name = f"rayclient-{cluster_name}" - else: - name = f"ray-dashboard-{cluster_name}" - mock_ingress = client.V1Ingress( - metadata=client.V1ObjectMeta( - name=name, - annotations=annotations, - labels=labels, - owner_references=[ - client.V1OwnerReference( - api_version="v1", kind="Ingress", name=cluster_name, uid="unique-id" - ) - ], - ), - spec=client.V1IngressSpec( - rules=[ - client.V1IngressRule( - host=f"{name}-ns.apps.cluster.awsroute.org", - http=client.V1HTTPIngressRuleValue( - paths=[ - client.V1HTTPIngressPath( - path_type="Prefix", - path="/", - backend=client.V1IngressBackend( - service=client.V1IngressServiceBackend( - name="head-svc-test", - port=client.V1ServiceBackendPort(number=port), - ) - ), - ) - ] - ), - ) - ], - ), - ) - return mock_ingress - - -def ingress_retrieval( - cluster_name="unit-test-cluster", client_ing: bool = False, annotations: dict = None -): - dashboard_ingress = mocked_ingress(8265, cluster_name, annotations) - if client_ing: - client_ingress = mocked_ingress( - 10001, cluster_name=cluster_name, annotations=annotations - ) - mock_ingress_list = client.V1IngressList( - items=[client_ingress, dashboard_ingress] - ) - else: - mock_ingress_list = client.V1IngressList(items=[dashboard_ingress]) - - return mock_ingress_list - - -def test_ray_job_wrapping(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - cluster = cluster = createClusterWithConfig(mocker) - mocker.patch( - "ray.job_submission.JobSubmissionClient._check_connection_and_version_with_url", - return_value="None", - ) - mock_res = mocker.patch.object( - ray.job_submission.JobSubmissionClient, "list_jobs", autospec=True - ) - mock_res.side_effect = ray_addr - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(), - ) - assert cluster.list_jobs() == cluster.cluster_dashboard_uri() - - mock_res = mocker.patch.object( - ray.job_submission.JobSubmissionClient, "get_job_status", autospec=True - ) - mock_res.side_effect = ray_addr - assert cluster.job_status("fake_id") == cluster.cluster_dashboard_uri() - - mock_res = mocker.patch.object( - ray.job_submission.JobSubmissionClient, "get_job_logs", autospec=True - ) - mock_res.side_effect = ray_addr - assert cluster.job_logs("fake_id") == cluster.cluster_dashboard_uri() - - -def test_print_no_resources(capsys): - try: - print_no_resources_found() - except Exception: - assert 1 == 0 - captured = capsys.readouterr() - assert captured.out == ( - "╭──────────────────────────────────────────────────────────────────────────────╮\n" - "│ No resources found, have you run cluster.up() yet? │\n" - "╰──────────────────────────────────────────────────────────────────────────────╯\n" - ) - - -def test_print_no_cluster(capsys): - try: - print_cluster_status(None) - except Exception: - assert 1 == 0 - captured = capsys.readouterr() - assert captured.out == ( - "╭──────────────────────────────────────────────────────────────────────────────╮\n" - "│ No resources found, have you run cluster.up() yet? │\n" - "╰──────────────────────────────────────────────────────────────────────────────╯\n" - ) - - -def test_print_appwrappers(capsys): - aw1 = AppWrapper( - name="awtest1", - status=AppWrapperStatus.SUSPENDED, - ) - aw2 = AppWrapper( - name="awtest2", - status=AppWrapperStatus.RUNNING, - ) - try: - print_app_wrappers_status([aw1, aw2]) - except Exception: - assert 1 == 0 - captured = capsys.readouterr() - assert captured.out == ( - "╭─────────────────────────╮\n" - "│ 🚀 Cluster Queue │\n" - "│ Status 🚀 │\n" - "│ +---------+-----------+ │\n" - "│ | Name | Status | │\n" - "│ +=========+===========+ │\n" - "│ | awtest1 | suspended | │\n" - "│ | | | │\n" - "│ | awtest2 | running | │\n" - "│ | | | │\n" - "│ +---------+-----------+ │\n" - "╰─────────────────────────╯\n" - ) - - -def test_ray_details(mocker, capsys): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - ray1 = RayCluster( - name="raytest1", - status=RayClusterStatus.READY, - num_workers=1, - worker_mem_requests="2G", - worker_mem_limits="2G", - worker_cpu_requests=1, - worker_cpu_limits=1, - namespace="ns", - dashboard="fake-uri", - head_cpu_requests=2, - head_cpu_limits=2, - head_mem_requests=8, - head_mem_limits=8, - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.status", - return_value=(False, CodeFlareClusterStatus.UNKNOWN), - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.cluster_dashboard_uri", - return_value="", - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - cf = Cluster( - ClusterConfiguration( - name="raytest2", - namespace="ns", - write_to_file=True, - appwrapper=True, - local_queue="local_default_queue", - ) - ) - captured = capsys.readouterr() - ray2 = _copy_to_ray(cf) - details = cf.details() - assert details == ray2 - assert ray2.name == "raytest2" - assert ray1.namespace == ray2.namespace - assert ray1.num_workers == ray2.num_workers - assert ray1.worker_mem_requests == ray2.worker_mem_requests - assert ray1.worker_mem_limits == ray2.worker_mem_limits - assert ray1.worker_cpu_requests == ray2.worker_cpu_requests - assert ray1.worker_cpu_limits == ray2.worker_cpu_limits - assert ray1.worker_extended_resources == ray2.worker_extended_resources - try: - print_clusters([ray1, ray2]) - print_cluster_status(ray1) - print_cluster_status(ray2) - except Exception: - assert 0 == 1 - captured = capsys.readouterr() - assert captured.out == ( - " 🚀 CodeFlare Cluster Details 🚀 \n" - " \n" - " ╭───────────────────────────────────────────────────────────────╮ \n" - " │ Name │ \n" - " │ raytest2 Inactive ❌ │ \n" - " │ │ \n" - " │ URI: ray://raytest2-head-svc.ns.svc:10001 │ \n" - " │ │ \n" - " │ Dashboard🔗 │ \n" - " │ │ \n" - " │ Cluster Resources │ \n" - " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" - " │ │ # Workers │ │ Memory CPU GPU │ │ \n" - " │ │ │ │ │ │ \n" - " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" - " │ │ │ │ │ │ \n" - " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" - " ╰───────────────────────────────────────────────────────────────╯ \n" - " 🚀 CodeFlare Cluster Details 🚀 \n" - " \n" - " ╭───────────────────────────────────────────────────────────────╮ \n" - " │ Name │ \n" - " │ raytest1 Active ✅ │ \n" - " │ │ \n" - " │ URI: ray://raytest1-head-svc.ns.svc:10001 │ \n" - " │ │ \n" - " │ Dashboard🔗 │ \n" - " │ │ \n" - " │ Cluster Resources │ \n" - " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" - " │ │ # Workers │ │ Memory CPU GPU │ │ \n" - " │ │ │ │ │ │ \n" - " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" - " │ │ │ │ │ │ \n" - " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" - " ╰───────────────────────────────────────────────────────────────╯ \n" - "╭───────────────────────────────────────────────────────────────╮\n" - "│ Name │\n" - "│ raytest2 Inactive ❌ │\n" - "│ │\n" - "│ URI: ray://raytest2-head-svc.ns.svc:10001 │\n" - "│ │\n" - "│ Dashboard🔗 │\n" - "│ │\n" - "│ Cluster Resources │\n" - "│ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │\n" - "│ │ # Workers │ │ Memory CPU GPU │ │\n" - "│ │ │ │ │ │\n" - "│ │ 1 │ │ 2G~2G 1~1 0 │ │\n" - "│ │ │ │ │ │\n" - "│ ╰─────────────╯ ╰──────────────────────────────────────╯ │\n" - "╰───────────────────────────────────────────────────────────────╯\n" - " 🚀 CodeFlare Cluster Status 🚀 \n" - " \n" - " ╭──────────────────────────────────────────────────────────╮ \n" - " │ Name │ \n" - " │ raytest1 Active ✅ │ \n" - " │ │ \n" - " │ URI: ray://raytest1-head-svc.ns.svc:10001 │ \n" - " │ │ \n" - " │ Dashboard🔗 │ \n" - " │ │ \n" - " ╰──────────────────────────────────────────────────────────╯ \n" - " 🚀 CodeFlare Cluster Status 🚀 \n" - " \n" - " ╭────────────────────────────────────────────────────────────╮ \n" - " │ Name │ \n" - " │ raytest2 Inactive ❌ │ \n" - " │ │ \n" - " │ URI: ray://raytest2-head-svc.ns.svc:10001 │ \n" - " │ │ \n" - " │ Dashboard🔗 │ \n" - " │ │ \n" - " ╰────────────────────────────────────────────────────────────╯ \n" - ) - - -def act_side_effect_list(self): - print([self]) - self.out = str(self.high_level_operation) - return [self] - - -def get_obj_none(group, version, namespace, plural): - return {"items": []} - - -def get_ray_obj(group, version, namespace, plural, cls=None): - api_obj = { - "items": [ - { - "apiVersion": "ray.io/v1", - "kind": "RayCluster", - "metadata": { - "creationTimestamp": "2024-03-05T09:55:37Z", - "generation": 1, - "labels": { - "controller-tools.k8s.io": "1.0", - "resourceName": "quicktest", - "orderedinstance": "m4.xlarge_g4dn.xlarge", - "kueue.x-k8s.io/queue-name": "team-a-queue", - }, - "name": "quicktest", - "namespace": "ns", - "ownerReferences": [ - { - "apiVersion": "workload.codeflare.dev/v1beta2", - "blockOwnerDeletion": True, - "controller": True, - "kind": "AppWrapper", - "name": "quicktest", - "uid": "a29b1a7a-0992-4860-a8d5-a689a751a3e8", - } - ], - "resourceVersion": "5305674", - "uid": "820d065d-bf0c-4675-b951-d32ea496020e", - }, - "spec": { - "autoscalerOptions": { - "idleTimeoutSeconds": 60, - "imagePullPolicy": "Always", - "resources": { - "limits": {"cpu": "500m", "memory": "512Mi"}, - "requests": {"cpu": "500m", "memory": "512Mi"}, - }, - "upscalingMode": "Default", - }, - "enableInTreeAutoscaling": False, - "headGroupSpec": { - "rayStartParams": { - "block": "true", - "dashboard-host": "0.0.0.0", - "num-gpus": "0", - }, - "serviceType": "ClusterIP", - "template": { - "metadata": {}, - "spec": { - "containers": [ - { - "env": [ - { - "name": "MY_POD_IP", - "valueFrom": { - "fieldRef": { - "fieldPath": "status.podIP" - } - }, - }, - {"name": "RAY_USE_TLS", "value": "0"}, - { - "name": "RAY_TLS_SERVER_CERT", - "value": "/home/ray/workspace/tls/server.crt", - }, - { - "name": "RAY_TLS_SERVER_KEY", - "value": "/home/ray/workspace/tls/server.key", - }, - { - "name": "RAY_TLS_CA_CERT", - "value": "/home/ray/workspace/tls/ca.crt", - }, - ], - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "imagePullPolicy": "Always", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "ray-head", - "ports": [ - { - "containerPort": 6379, - "name": "gcs", - "protocol": "TCP", - }, - { - "containerPort": 8265, - "name": "dashboard", - "protocol": "TCP", - }, - { - "containerPort": 10001, - "name": "client", - "protocol": "TCP", - }, - ], - "resources": { - "limits": { - "cpu": 2, - "memory": "8G", - }, - "requests": { - "cpu": 2, - "memory": "8G", - }, - }, - "volumeMounts": [ - { - "mountPath": "/etc/pki/tls/certs/odh-trusted-ca-bundle.crt", - "name": "odh-trusted-ca-cert", - "subPath": "odh-trusted-ca-bundle.crt", - }, - { - "mountPath": "/etc/ssl/certs/odh-trusted-ca-bundle.crt", - "name": "odh-trusted-ca-cert", - "subPath": "odh-trusted-ca-bundle.crt", - }, - { - "mountPath": "/etc/pki/tls/certs/odh-ca-bundle.crt", - "name": "odh-ca-cert", - "subPath": "odh-ca-bundle.crt", - }, - { - "mountPath": "/etc/ssl/certs/odh-ca-bundle.crt", - "name": "odh-ca-cert", - "subPath": "odh-ca-bundle.crt", - }, - ], - } - ], - "volumes": [ - { - "configMap": { - "items": [ - { - "key": "ca-bundle.crt", - "path": "odh-trusted-ca-bundle.crt", - } - ], - "name": "odh-trusted-ca-bundle", - "optional": True, - }, - "name": "odh-trusted-ca-cert", - }, - { - "configMap": { - "items": [ - { - "key": "odh-ca-bundle.crt", - "path": "odh-ca-bundle.crt", - } - ], - "name": "odh-trusted-ca-bundle", - "optional": True, - }, - "name": "odh-ca-cert", - }, - ], - }, - }, - }, - "rayVersion": "2.35.0", - "workerGroupSpecs": [ - { - "groupName": "small-group-quicktest", - "maxReplicas": 1, - "minReplicas": 1, - "rayStartParams": { - "block": "true", - "num-gpus": "0", - }, - "replicas": 1, - "scaleStrategy": {}, - "template": { - "metadata": { - "annotations": {"key": "value"}, - "labels": {"key": "value"}, - }, - "spec": { - "containers": [ - { - "env": [ - { - "name": "MY_POD_IP", - "valueFrom": { - "fieldRef": { - "fieldPath": "status.podIP" - } - }, - }, - {"name": "RAY_USE_TLS", "value": "0"}, - { - "name": "RAY_TLS_SERVER_CERT", - "value": "/home/ray/workspace/tls/server.crt", - }, - { - "name": "RAY_TLS_SERVER_KEY", - "value": "/home/ray/workspace/tls/server.key", - }, - { - "name": "RAY_TLS_CA_CERT", - "value": "/home/ray/workspace/tls/ca.crt", - }, - ], - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "machine-learning", - "resources": { - "limits": { - "cpu": 1, - "memory": "2G", - }, - "requests": { - "cpu": 1, - "memory": "2G", - }, - }, - "volumeMounts": [ - { - "mountPath": "/etc/pki/tls/certs/odh-trusted-ca-bundle.crt", - "name": "odh-trusted-ca-cert", - "subPath": "odh-trusted-ca-bundle.crt", - }, - { - "mountPath": "/etc/ssl/certs/odh-trusted-ca-bundle.crt", - "name": "odh-trusted-ca-cert", - "subPath": "odh-trusted-ca-bundle.crt", - }, - { - "mountPath": "/etc/pki/tls/certs/odh-ca-bundle.crt", - "name": "odh-ca-cert", - "subPath": "odh-ca-bundle.crt", - }, - { - "mountPath": "/etc/ssl/certs/odh-ca-bundle.crt", - "name": "odh-ca-cert", - "subPath": "odh-ca-bundle.crt", - }, - ], - } - ], - "volumes": [ - { - "configMap": { - "items": [ - { - "key": "ca-bundle.crt", - "path": "odh-trusted-ca-bundle.crt", - } - ], - "name": "odh-trusted-ca-bundle", - "optional": True, - }, - "name": "odh-trusted-ca-cert", - }, - { - "configMap": { - "items": [ - { - "key": "odh-ca-bundle.crt", - "path": "odh-ca-bundle.crt", - } - ], - "name": "odh-trusted-ca-bundle", - "optional": True, - }, - "name": "odh-ca-cert", - }, - ], - }, - }, - } - ], - }, - "status": { - "desiredWorkerReplicas": 1, - "endpoints": { - "client": "10001", - "dashboard": "8265", - "gcs": "6379", - "metrics": "8080", - }, - "head": {"serviceIP": "172.30.179.88"}, - "lastUpdateTime": "2024-03-05T09:55:37Z", - "maxWorkerReplicas": 1, - "minWorkerReplicas": 1, - "observedGeneration": 1, - "state": "ready", - }, - }, - { - "apiVersion": "ray.io/v1", - "kind": "RayCluster", - "metadata": { - "creationTimestamp": "2023-02-22T16:26:07Z", - "generation": 1, - "labels": { - "controller-tools.k8s.io": "1.0", - "resourceName": "quicktest2", - "orderedinstance": "m4.xlarge_g4dn.xlarge", - }, - "name": "quicktest2", - "namespace": "ns", - "ownerReferences": [ - { - "apiVersion": "workload.codeflare.dev/v1beta2", - "blockOwnerDeletion": True, - "controller": True, - "kind": "AppWrapper", - "name": "quicktest2", - "uid": "6334fc1b-471e-4876-8e7b-0b2277679235", - } - ], - "resourceVersion": "9482407", - "uid": "44d45d1f-26c8-43e7-841f-831dbd8c1285", - }, - "spec": { - "autoscalerOptions": { - "idleTimeoutSeconds": 60, - "imagePullPolicy": "Always", - "resources": { - "limits": {"cpu": "500m", "memory": "512Mi"}, - "requests": {"cpu": "500m", "memory": "512Mi"}, - }, - "upscalingMode": "Default", - }, - "enableInTreeAutoscaling": False, - "headGroupSpec": { - "rayStartParams": { - "block": "true", - "dashboard-host": "0.0.0.0", - "num-gpus": "0", - }, - "serviceType": "ClusterIP", - "template": { - "spec": { - "containers": [ - { - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "imagePullPolicy": "Always", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "ray-head", - "ports": [ - { - "containerPort": 6379, - "name": "gcs", - "protocol": "TCP", - }, - { - "containerPort": 8265, - "name": "dashboard", - "protocol": "TCP", - }, - { - "containerPort": 10001, - "name": "client", - "protocol": "TCP", - }, - ], - "resources": { - "limits": { - "cpu": 2, - "memory": "8G", - }, - "requests": { - "cpu": 2, - "memory": "8G", - }, - }, - } - ] - } - }, - }, - "rayVersion": "2.35.0", - "workerGroupSpecs": [ - { - "groupName": "small-group-quicktest2", - "maxReplicas": 1, - "minReplicas": 1, - "rayStartParams": { - "block": "true", - "num-gpus": "0", - }, - "replicas": 1, - "template": { - "metadata": { - "annotations": {"key": "value"}, - "labels": {"key": "value"}, - }, - "spec": { - "containers": [ - { - "env": [ - { - "name": "MY_POD_IP", - "valueFrom": { - "fieldRef": { - "fieldPath": "status.podIP" - } - }, - } - ], - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "machine-learning", - "resources": { - "limits": { - "cpu": 1, - "memory": "2G", - }, - "requests": { - "cpu": 1, - "memory": "2G", - }, - }, - } - ], - }, - }, - } - ], - }, - "status": { - "availableWorkerReplicas": 2, - "desiredWorkerReplicas": 1, - "endpoints": { - "client": "10001", - "dashboard": "8265", - "gcs": "6379", - }, - "lastUpdateTime": "2023-02-22T16:26:16Z", - "maxWorkerReplicas": 1, - "minWorkerReplicas": 1, - "state": "suspended", - }, - }, - ] - } - return api_obj - - -def get_named_aw(group, version, namespace, plural, name): - aws = get_aw_obj("workload.codeflare.dev", "v1beta2", "ns", "appwrappers") - return aws["items"][0] - - -def get_aw_obj(group, version, namespace, plural): - api_obj1 = { - "items": [ - { - "apiVersion": "workload.codeflare.dev/v1beta2", - "kind": "AppWrapper", - "metadata": { - "name": "quicktest1", - "namespace": "ns", - }, - "spec": { - "components": [ - { - "template": { - "apiVersion": "ray.io/v1", - "kind": "RayCluster", - "metadata": { - "labels": { - "controller-tools.k8s.io": "1.0", - }, - "name": "quicktest1", - "namespace": "ns", - }, - "spec": { - "autoscalerOptions": { - "idleTimeoutSeconds": 60, - "imagePullPolicy": "Always", - "resources": { - "limits": { - "cpu": "500m", - "memory": "512Mi", - }, - "requests": { - "cpu": "500m", - "memory": "512Mi", - }, - }, - "upscalingMode": "Default", - }, - "enableInTreeAutoscaling": False, - "headGroupSpec": { - "rayStartParams": { - "block": "true", - "dashboard-host": "0.0.0.0", - "num-gpus": "0", - }, - "serviceType": "ClusterIP", - "template": { - "spec": { - "containers": [ - { - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "imagePullPolicy": "Always", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "ray-head", - "ports": [ - { - "containerPort": 6379, - "name": "gcs", - }, - { - "containerPort": 8265, - "name": "dashboard", - }, - { - "containerPort": 10001, - "name": "client", - }, - ], - "resources": { - "limits": { - "cpu": 2, - "memory": "8G", - }, - "requests": { - "cpu": 2, - "memory": "8G", - }, - }, - } - ] - } - }, - }, - "rayVersion": "1.12.0", - "workerGroupSpecs": [ - { - "groupName": "small-group-quicktest", - "maxReplicas": 1, - "minReplicas": 1, - "rayStartParams": { - "block": "true", - "num-gpus": "0", - }, - "replicas": 1, - "template": { - "metadata": { - "annotations": {"key": "value"}, - "labels": {"key": "value"}, - }, - "spec": { - "containers": [ - { - "env": [ - { - "name": "MY_POD_IP", - "valueFrom": { - "fieldRef": { - "fieldPath": "status.podIP" - } - }, - } - ], - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "machine-learning", - "resources": { - "limits": { - "cpu": 1, - "memory": "2G", - }, - "requests": { - "cpu": 1, - "memory": "2G", - }, - }, - } - ], - }, - }, - } - ], - }, - }, - }, - { - "template": { - "apiVersion": "networking.k8s.io/v1", - "kind": "Ingress", - "metadata": { - "labels": { - "ingress-owner": "appwrapper-name", - }, - "name": "ray-dashboard-quicktest", - "namespace": "default", - }, - "spec": { - "ingressClassName": "nginx", - "rules": [ - { - "http": { - "paths": { - "backend": { - "service": { - "name": "quicktest-head-svc", - "port": {"number": 8265}, - }, - }, - "pathType": "Prefix", - "path": "/", - }, - }, - "host": "quicktest.awsroute.com", - } - ], - }, - }, - }, - ], - }, - "status": { - "phase": "Running", - }, - }, - { - "apiVersion": "workload.codeflare.dev/v1beta2", - "kind": "AppWrapper", - "metadata": { - "name": "quicktest2", - "namespace": "ns", - }, - "spec": { - "components": [ - { - "template": { - "apiVersion": "ray.io/v1", - "kind": "RayCluster", - "metadata": { - "labels": { - "controller-tools.k8s.io": "1.0", - }, - "name": "quicktest2", - "namespace": "ns", - }, - "spec": { - "autoscalerOptions": { - "idleTimeoutSeconds": 60, - "imagePullPolicy": "Always", - "resources": { - "limits": { - "cpu": "500m", - "memory": "512Mi", - }, - "requests": { - "cpu": "500m", - "memory": "512Mi", - }, - }, - "upscalingMode": "Default", - }, - "enableInTreeAutoscaling": False, - "headGroupSpec": { - "rayStartParams": { - "block": "true", - "dashboard-host": "0.0.0.0", - "num-gpus": "0", - }, - "serviceType": "ClusterIP", - "template": { - "spec": { - "containers": [ - { - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "imagePullPolicy": "Always", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "ray-head", - "ports": [ - { - "containerPort": 6379, - "name": "gcs", - }, - { - "containerPort": 8265, - "name": "dashboard", - }, - { - "containerPort": 10001, - "name": "client", - }, - ], - "resources": { - "limits": { - "cpu": 2, - "memory": "8G", - }, - "requests": { - "cpu": 2, - "memory": "8G", - }, - }, - } - ] - } - }, - }, - "rayVersion": "2.35.0", - "workerGroupSpecs": [ - { - "groupName": "small-group-quicktest", - "maxReplicas": 1, - "minReplicas": 1, - "rayStartParams": { - "block": "true", - "num-gpus": "0", - }, - "replicas": 1, - "template": { - "metadata": { - "annotations": {"key": "value"}, - "labels": {"key": "value"}, - }, - "spec": { - "containers": [ - { - "env": [ - { - "name": "MY_POD_IP", - "valueFrom": { - "fieldRef": { - "fieldPath": "status.podIP" - } - }, - } - ], - "image": "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103", - "lifecycle": { - "preStop": { - "exec": { - "command": [ - "/bin/sh", - "-c", - "ray stop", - ] - } - } - }, - "name": "machine-learning", - "resources": { - "limits": { - "cpu": 1, - "memory": "2G", - }, - "requests": { - "cpu": 1, - "memory": "2G", - }, - }, - } - ], - }, - }, - } - ], - }, - }, - }, - { - "template": { - "apiVersion": "route.openshift.io/v1", - "kind": "Route", - "metadata": { - "labels": { - "odh-ray-cluster-service": "quicktest-head-svc" - }, - "name": "ray-dashboard-quicktest", - "namespace": "default", - }, - "spec": { - "port": {"targetPort": "dashboard"}, - "to": { - "kind": "Service", - "name": "quicktest-head-svc", - }, - }, - }, - }, - ], - }, - "status": { - "phase": "Suspended", - }, - }, - ] - } - return api_obj1 - - -def route_list_retrieval(group, version, namespace, plural): - assert group == "route.openshift.io" - assert version == "v1" - assert namespace == "ns" - assert plural == "routes" - return { - "kind": "RouteList", - "apiVersion": "route.openshift.io/v1", - "metadata": {"resourceVersion": "6072398"}, - "items": [ - { - "metadata": { - "name": "ray-dashboard-quicktest", - "namespace": "ns", - }, - "spec": { - "host": "ray-dashboard-quicktest-opendatahub.apps.cluster.awsroute.org", - "to": { - "kind": "Service", - "name": "quicktest-head-svc", - "weight": 100, - }, - "port": {"targetPort": "dashboard"}, - "tls": {"termination": "edge"}, - }, - }, - { - "metadata": { - "name": "rayclient-quicktest", - "namespace": "ns", - }, - "spec": { - "host": "rayclient-quicktest-opendatahub.apps.cluster.awsroute.org", - "to": { - "kind": "Service", - "name": "quicktest-head-svc", - "weight": 100, - }, - "port": {"targetPort": "client"}, - "tls": {"termination": "passthrough"}, - }, - }, - ], - } - - -def test_get_cluster_openshift(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - # Mock the client.ApisApi function to return a mock object - mock_api = MagicMock() - mock_api.get_api_versions.return_value.groups = [ - MagicMock(versions=[MagicMock(group_version="route.openshift.io/v1")]) - ] - mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - - assert is_openshift_cluster() - - def custom_side_effect(group, version, namespace, plural, **kwargs): - if plural == "routes": - return route_list_retrieval("route.openshift.io", "v1", "ns", "routes") - elif plural == "rayclusters": - return get_ray_obj("ray.io", "v1", "ns", "rayclusters") - elif plural == "appwrappers": - return get_aw_obj("workload.codeflare.dev", "v1beta2", "ns", "appwrappers") - elif plural == "localqueues": - return get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues") - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", get_aw_obj - ) - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=custom_side_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - return_value=get_named_aw, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=route_list_retrieval("route.openshift.io", "v1", "ns", "routes")[ - "items" - ], - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - - cluster = get_cluster("quicktest") - cluster_config = cluster.config - assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" - assert ( - "m4.xlarge" in cluster_config.machine_types - and "g4dn.xlarge" in cluster_config.machine_types - ) - assert ( - cluster_config.worker_cpu_requests == 1 - and cluster_config.worker_cpu_limits == 1 - ) - assert ( - cluster_config.worker_memory_requests == "2G" - and cluster_config.worker_memory_limits == "2G" - ) - assert cluster_config.worker_extended_resource_requests == {} - assert ( - cluster_config.image - == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" - ) - assert cluster_config.num_workers == 1 - - -def test_get_cluster(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_ray_obj, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=get_named_aw, - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(cluster_name="quicktest", client_ing=True), - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - cluster = get_cluster("quicktest") - cluster_config = cluster.config - assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" - assert ( - "m4.xlarge" in cluster_config.machine_types - and "g4dn.xlarge" in cluster_config.machine_types - ) - assert ( - cluster_config.worker_cpu_requests == 1 - and cluster_config.worker_cpu_limits == 1 - ) - assert ( - cluster_config.worker_memory_requests == "2G" - and cluster_config.worker_memory_limits == "2G" - ) - assert cluster_config.worker_extended_resource_requests == {} - assert ( - cluster_config.image - == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" - ) - assert cluster_config.num_workers == 1 - - -def test_get_cluster_no_mcad(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_ray_obj, - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(cluster_name="quicktest", client_ing=True), - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - cluster = get_cluster("quicktest") - cluster_config = cluster.config - assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" - assert ( - "m4.xlarge" in cluster_config.machine_types - and "g4dn.xlarge" in cluster_config.machine_types - ) - assert ( - cluster_config.worker_cpu_requests == 1 - and cluster_config.worker_cpu_limits == 1 - ) - assert ( - cluster_config.worker_memory_requests == "2G" - and cluster_config.worker_memory_limits == "2G" - ) - assert cluster_config.worker_extended_resource_requests == {} - assert ( - cluster_config.image - == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" - ) - assert cluster_config.num_workers == 1 - assert cluster_config.local_queue == "team-a-queue" - - -def route_retrieval(group, version, namespace, plural, name): - assert group == "route.openshift.io" - assert version == "v1" - assert namespace == "ns" - assert plural == "routes" - assert name == "ray-dashboard-unit-test-cluster" - return { - "items": [ - { - "metadata": {"name": "ray-dashboard-unit-test-cluster"}, - "spec": { - "host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" - }, - } - ] - } - - -def test_map_to_ray_cluster(mocker): - mocker.patch("kubernetes.config.load_kube_config") - - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.is_openshift_cluster", return_value=True - ) - - mock_api_client = mocker.MagicMock(spec=client.ApiClient) - mocker.patch( - "codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", - return_value=mock_api_client, - ) - - mock_routes = { - "items": [ - { - "apiVersion": "route.openshift.io/v1", - "kind": "Route", - "metadata": { - "name": "ray-dashboard-quicktest", - "namespace": "ns", - }, - "spec": {"host": "ray-dashboard-quicktest"}, - }, - ] - } - - def custom_side_effect(group, version, namespace, plural, **kwargs): - if plural == "routes": - return mock_routes - elif plural == "rayclusters": - return get_ray_obj("ray.io", "v1", "ns", "rayclusters") - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=custom_side_effect, - ) - - rc = get_ray_obj("ray.io", "v1", "ns", "rayclusters")["items"][0] - rc_name = rc["metadata"]["name"] - rc_dashboard = f"http://ray-dashboard-{rc_name}" - - result = _map_to_ray_cluster(rc) - - assert result is not None - assert result.dashboard == rc_dashboard - - -def test_list_clusters(mocker, capsys): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_obj_none, - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - ) - list_all_clusters("ns") - captured = capsys.readouterr() - assert captured.out == ( - "╭──────────────────────────────────────────────────────────────────────────────╮\n" - "│ No resources found, have you run cluster.up() yet? │\n" - "╰──────────────────────────────────────────────────────────────────────────────╯\n" - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_ray_obj, - ) - list_all_clusters("ns") - captured = capsys.readouterr() - assert captured.out == ( - " 🚀 CodeFlare Cluster Details 🚀 \n" - " \n" - " ╭───────────────────────────────────────────────────────────────╮ \n" - " │ Name │ \n" - " │ quicktest Active ✅ │ \n" - " │ │ \n" - " │ URI: ray://quicktest-head-svc.ns.svc:10001 │ \n" - " │ │ \n" - " │ Dashboard🔗 │ \n" - " │ │ \n" - " │ Cluster Resources │ \n" - " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n" - " │ │ # Workers │ │ Memory CPU GPU │ │ \n" - " │ │ │ │ │ │ \n" - " │ │ 1 │ │ 2G~2G 1~1 0 │ │ \n" - " │ │ │ │ │ │ \n" - " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n" - " ╰───────────────────────────────────────────────────────────────╯ \n" - "╭───────────────────────────────────────────────────────────────╮\n" - "│ Name │\n" - "│ quicktest2 Inactive ❌ │\n" - "│ │\n" - "│ URI: ray://quicktest2-head-svc.ns.svc:10001 │\n" - "│ │\n" - "│ Dashboard🔗 │\n" - "│ │\n" - "│ Cluster Resources │\n" - "│ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │\n" - "│ │ # Workers │ │ Memory CPU GPU │ │\n" - "│ │ │ │ │ │\n" - "│ │ 1 │ │ 2G~2G 1~1 0 │ │\n" - "│ │ │ │ │ │\n" - "│ ╰─────────────╯ ╰──────────────────────────────────────╯ │\n" - "╰───────────────────────────────────────────────────────────────╯\n" - ) - - -def test_list_queue(mocker, capsys): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_obj_none, - ) - list_all_queued("ns", appwrapper=True) - captured = capsys.readouterr() - assert captured.out == ( - "╭──────────────────────────────────────────────────────────────────────────────╮\n" - "│ No resources found, have you run cluster.up() yet? │\n" - "╰──────────────────────────────────────────────────────────────────────────────╯\n" - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_aw_obj, - ) - list_all_queued("ns", appwrapper=True) - captured = capsys.readouterr() - assert captured.out == ( - "╭────────────────────────────╮\n" - "│ 🚀 Cluster Queue Status │\n" - "│ 🚀 │\n" - "│ +------------+-----------+ │\n" - "│ | Name | Status | │\n" - "│ +============+===========+ │\n" - "│ | quicktest1 | running | │\n" - "│ | | | │\n" - "│ | quicktest2 | suspended | │\n" - "│ | | | │\n" - "│ +------------+-----------+ │\n" - "╰────────────────────────────╯\n" - ) - - -def test_list_queue_rayclusters(mocker, capsys): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mock_api = MagicMock() - mock_api.get_api_versions.return_value.groups = [ - MagicMock(versions=[MagicMock(group_version="route.openshift.io/v1")]) - ] - mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) - - assert is_openshift_cluster() == True - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_obj_none, - ) - list_all_queued("ns") - captured = capsys.readouterr() - assert captured.out == ( - "╭──────────────────────────────────────────────────────────────────────────────╮\n" - "│ No resources found, have you run cluster.up() yet? │\n" - "╰──────────────────────────────────────────────────────────────────────────────╯\n" - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_ray_obj, - ) - list_all_queued("ns") - captured = capsys.readouterr() - print(captured.out) - assert captured.out == ( - "╭────────────────────────────╮\n" - "│ 🚀 Cluster Queue Status │\n" - "│ 🚀 │\n" - "│ +------------+-----------+ │\n" - "│ | Name | Status | │\n" - "│ +============+===========+ │\n" - "│ | quicktest | ready | │\n" - "│ | | | │\n" - "│ | quicktest2 | suspended | │\n" - "│ | | | │\n" - "│ +------------+-----------+ │\n" - "╰────────────────────────────╯\n" - ) - - -def test_cluster_status(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - fake_aw = AppWrapper("test", AppWrapperStatus.FAILED) - fake_ray = RayCluster( - name="test", - status=RayClusterStatus.UNKNOWN, - num_workers=1, - worker_mem_requests=2, - worker_mem_limits=2, - worker_cpu_requests=1, - worker_cpu_limits=1, - namespace="ns", - dashboard="fake-uri", - head_cpu_requests=2, - head_cpu_limits=2, - head_mem_requests=8, - head_mem_limits=8, - ) - cf = Cluster( - ClusterConfiguration( - name="test", - namespace="ns", - write_to_file=True, - appwrapper=True, - local_queue="local_default_queue", - ) - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=None - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None - ) - status, ready = cf.status() - assert status == CodeFlareClusterStatus.UNKNOWN - assert ready == False - - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=fake_aw - ) - status, ready = cf.status() - assert status == CodeFlareClusterStatus.FAILED - assert ready == False - - fake_aw.status = AppWrapperStatus.SUSPENDED - status, ready = cf.status() - assert status == CodeFlareClusterStatus.QUEUED - assert ready == False - - fake_aw.status = AppWrapperStatus.RESUMING - status, ready = cf.status() - assert status == CodeFlareClusterStatus.STARTING - assert ready == False - - fake_aw.status = AppWrapperStatus.RESETTING - status, ready = cf.status() - assert status == CodeFlareClusterStatus.STARTING - assert ready == False - - fake_aw.status = AppWrapperStatus.RUNNING - status, ready = cf.status() - assert status == CodeFlareClusterStatus.UNKNOWN - assert ready == False - - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=fake_ray - ) - - status, ready = cf.status() - assert status == CodeFlareClusterStatus.STARTING - assert ready == False - - fake_ray.status = RayClusterStatus.FAILED - status, ready = cf.status() - assert status == CodeFlareClusterStatus.FAILED - assert ready == False - - fake_ray.status = RayClusterStatus.UNHEALTHY - status, ready = cf.status() - assert status == CodeFlareClusterStatus.FAILED - assert ready == False - - fake_ray.status = RayClusterStatus.READY - status, ready = cf.status() - assert status == CodeFlareClusterStatus.READY - assert ready == True - - -def test_wait_ready(mocker, capsys): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(), - ) - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._app_wrapper_status", return_value=None - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - mocker.patch.object( - client.CustomObjectsApi, - "list_namespaced_custom_object", - return_value={ - "items": [ - { - "metadata": {"name": "ray-dashboard-test"}, - "spec": {"host": "mocked-host"}, - } - ] - }, - ) - mock_response = mocker.Mock() - mock_response.status_code = 200 - mocker.patch("requests.get", return_value=mock_response) - cf = Cluster( - ClusterConfiguration( - name="test", - namespace="ns", - write_to_file=True, - appwrapper=True, - local_queue="local-queue-default", - ) - ) - try: - cf.wait_ready(timeout=5) - assert 1 == 0 - except Exception as e: - assert type(e) == TimeoutError - - captured = capsys.readouterr() - assert ( - "WARNING: Current cluster status is unknown, have you run cluster.up yet?" - in captured.out - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.status", - return_value=(True, CodeFlareClusterStatus.READY), - ) - cf.wait_ready() - captured = capsys.readouterr() - assert ( - captured.out - == "Waiting for requested resources to be set up...\nRequested cluster is up and running!\nDashboard is ready!\n" - ) - cf.wait_ready(dashboard_check=False) - captured = capsys.readouterr() - assert ( - captured.out - == "Waiting for requested resources to be set up...\nRequested cluster is up and running!\n" - ) - - -def arg_check_side_effect(*args): - assert args[0] == "fake-app-handle" - - -def parse_j(cmd): - pattern = r"--nnodes\s+\d+\s+--nproc_per_node\s+\d+" - match = re.search(pattern, cmd) - if match: - substring = match.group(0) - else: - return None - args = substring.split() - worker = args[1] - gpu = args[3] - return f"{worker}x{gpu}" - - -def test_AWManager_creation(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - testaw = AWManager(f"{aw_dir}test.yaml") - assert testaw.name == "test" - assert testaw.namespace == "ns" - assert testaw.submitted == False - try: - testaw = AWManager("fake") - except Exception as e: - assert type(e) == FileNotFoundError - assert str(e) == "[Errno 2] No such file or directory: 'fake'" - try: - testaw = AWManager("tests/test-case-bad.yaml") - except Exception as e: - assert type(e) == ValueError - assert ( - str(e) - == "tests/test-case-bad.yaml is not a correctly formatted AppWrapper yaml" - ) - - -def arg_check_aw_apply_effect(group, version, namespace, plural, body, *args): - assert group == "workload.codeflare.dev" - assert version == "v1beta2" - assert namespace == "ns" - assert plural == "appwrappers" - with open(f"{aw_dir}test.yaml") as f: - aw = yaml.load(f, Loader=yaml.FullLoader) - assert body == aw - assert args == tuple() - - -def arg_check_aw_del_effect(group, version, namespace, plural, name, *args): - assert group == "workload.codeflare.dev" - assert version == "v1beta2" - assert namespace == "ns" - assert plural == "appwrappers" - assert name == "test" - assert args == tuple() - - -def test_AWManager_submit_remove(mocker, capsys): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - testaw = AWManager(f"{aw_dir}test.yaml") - testaw.remove() - captured = capsys.readouterr() - assert ( - captured.out - == "AppWrapper not submitted by this manager yet, nothing to remove\n" - ) - assert testaw.submitted == False - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", - side_effect=arg_check_aw_apply_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", - side_effect=arg_check_aw_del_effect, - ) - testaw.submit() - assert testaw.submitted == True - testaw.remove() - assert testaw.submitted == False - - -import base64 - -from cryptography.hazmat.primitives.serialization import ( - Encoding, - PublicFormat, - load_pem_private_key, -) -from cryptography.x509 import load_pem_x509_certificate - - -def test_generate_ca_cert(): - """ - test the function codeflare_sdk.common.utils.generate_ca_cert generates the correct outputs - """ - key, certificate = generate_ca_cert() - cert = load_pem_x509_certificate(base64.b64decode(certificate)) - private_pub_key_bytes = ( - load_pem_private_key(base64.b64decode(key), password=None) - .public_key() - .public_bytes(Encoding.PEM, PublicFormat.SubjectPublicKeyInfo) - ) - cert_pub_key_bytes = cert.public_key().public_bytes( - Encoding.PEM, PublicFormat.SubjectPublicKeyInfo - ) - assert type(key) == str - assert type(certificate) == str - # Veirfy ca.cert is self signed - assert cert.verify_directly_issued_by(cert) == None - # Verify cert has the public key bytes from the private key - assert cert_pub_key_bytes == private_pub_key_bytes - - -def secret_ca_retreival(secret_name, namespace): - ca_private_key_bytes, ca_cert = generate_ca_cert() - data = {"ca.crt": ca_cert, "ca.key": ca_private_key_bytes} - assert secret_name == "ca-secret-cluster" - assert namespace == "namespace" - return client.models.V1Secret(data=data) - - -def test_generate_tls_cert(mocker): - """ - test the function codeflare_sdk.common.utils.generate_ca_cert generates the correct outputs - """ - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "codeflare_sdk.common.utils.generate_cert.get_secret_name", - return_value="ca-secret-cluster", - ) - mocker.patch( - "kubernetes.client.CoreV1Api.read_namespaced_secret", - side_effect=secret_ca_retreival, - ) - - generate_tls_cert("cluster", "namespace") - assert os.path.exists("tls-cluster-namespace") - assert os.path.exists(os.path.join("tls-cluster-namespace", "ca.crt")) - assert os.path.exists(os.path.join("tls-cluster-namespace", "tls.crt")) - assert os.path.exists(os.path.join("tls-cluster-namespace", "tls.key")) - - # verify the that the signed tls.crt is issued by the ca_cert (root cert) - with open(os.path.join("tls-cluster-namespace", "tls.crt"), "r") as f: - tls_cert = load_pem_x509_certificate(f.read().encode("utf-8")) - with open(os.path.join("tls-cluster-namespace", "ca.crt"), "r") as f: - root_cert = load_pem_x509_certificate(f.read().encode("utf-8")) - assert tls_cert.verify_directly_issued_by(root_cert) == None - - -def test_export_env(): - """ - test the function codeflare_sdk.common.utils.generate_ca_cert.export_ev generates the correct outputs - """ - tls_dir = "cluster" - ns = "namespace" - export_env(tls_dir, ns) - assert os.environ["RAY_USE_TLS"] == "1" - assert os.environ["RAY_TLS_SERVER_CERT"] == os.path.join( - os.getcwd(), f"tls-{tls_dir}-{ns}", "tls.crt" - ) - assert os.environ["RAY_TLS_SERVER_KEY"] == os.path.join( - os.getcwd(), f"tls-{tls_dir}-{ns}", "tls.key" - ) - assert os.environ["RAY_TLS_CA_CERT"] == os.path.join( - os.getcwd(), f"tls-{tls_dir}-{ns}", "ca.crt" - ) - - -def test_cluster_throw_for_no_raycluster(mocker: MockerFixture): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", - return_value="opendatahub", - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.get_default_kueue_name", - return_value="default", - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - - def throw_if_getting_raycluster(group, version, namespace, plural): - if plural == "rayclusters": - raise client.ApiException(status=404) - return - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=throw_if_getting_raycluster, - ) - cluster = Cluster( - ClusterConfiguration( - "test_cluster", - write_to_file=False, - ) - ) - with pytest.raises(RuntimeError): - cluster.up() - - -""" -Ray Jobs tests -""" - - -# rjc == RayJobClient -@pytest.fixture -def ray_job_client(mocker): - # Creating a fixture to instantiate RayJobClient with a mocked JobSubmissionClient - mocker.patch.object(JobSubmissionClient, "__init__", return_value=None) - return RayJobClient( - "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" - ) - - -def test_rjc_submit_job(ray_job_client, mocker): - mocked_submit_job = mocker.patch.object( - JobSubmissionClient, "submit_job", return_value="mocked_submission_id" - ) - submission_id = ray_job_client.submit_job(entrypoint={"pip": ["numpy"]}) - - mocked_submit_job.assert_called_once_with( - entrypoint={"pip": ["numpy"]}, - job_id=None, - runtime_env=None, - metadata=None, - submission_id=None, - entrypoint_num_cpus=None, - entrypoint_num_gpus=None, - entrypoint_memory=None, - entrypoint_resources=None, - ) - - assert submission_id == "mocked_submission_id" - - -def test_rjc_delete_job(ray_job_client, mocker): - # Case return True - mocked_delete_job_True = mocker.patch.object( - JobSubmissionClient, "delete_job", return_value=True - ) - result = ray_job_client.delete_job(job_id="mocked_job_id") - - mocked_delete_job_True.assert_called_once_with(job_id="mocked_job_id") - assert result == (True, "Successfully deleted Job mocked_job_id") - - # Case return False - mocked_delete_job_False = mocker.patch.object( - JobSubmissionClient, "delete_job", return_value=(False) - ) - result = ray_job_client.delete_job(job_id="mocked_job_id") - - mocked_delete_job_False.assert_called_once_with(job_id="mocked_job_id") - assert result == (False, "Failed to delete Job mocked_job_id") - - -def test_rjc_stop_job(ray_job_client, mocker): - # Case return True - mocked_stop_job_True = mocker.patch.object( - JobSubmissionClient, "stop_job", return_value=(True) - ) - result = ray_job_client.stop_job(job_id="mocked_job_id") - - mocked_stop_job_True.assert_called_once_with(job_id="mocked_job_id") - assert result == (True, "Successfully stopped Job mocked_job_id") - - # Case return False - mocked_stop_job_False = mocker.patch.object( - JobSubmissionClient, "stop_job", return_value=(False) - ) - result = ray_job_client.stop_job(job_id="mocked_job_id") - - mocked_stop_job_False.assert_called_once_with(job_id="mocked_job_id") - assert result == ( - False, - "Failed to stop Job, mocked_job_id could have already completed.", - ) - - -def test_rjc_address(ray_job_client, mocker): - mocked_rjc_address = mocker.patch.object( - JobSubmissionClient, - "get_address", - return_value="https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org", - ) - address = ray_job_client.get_address() - - mocked_rjc_address.assert_called_once() - assert ( - address - == "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" - ) - - -def test_rjc_get_job_logs(ray_job_client, mocker): - mocked_rjc_get_job_logs = mocker.patch.object( - JobSubmissionClient, "get_job_logs", return_value="Logs" - ) - logs = ray_job_client.get_job_logs(job_id="mocked_job_id") - - mocked_rjc_get_job_logs.assert_called_once_with(job_id="mocked_job_id") - assert logs == "Logs" - - -def test_rjc_get_job_info(ray_job_client, mocker): - job_details_example = "JobDetails(type=, job_id=None, submission_id='mocked_submission_id', driver_info=None, status=, entrypoint='python test.py', message='Job has not started yet. It may be waiting for the runtime environment to be set up.', error_type=None, start_time=1701271760641, end_time=None, metadata={}, runtime_env={'working_dir': 'gcs://_ray_pkg_67de6f0e60d43b19.zip', 'pip': {'packages': ['numpy'], 'pip_check': False}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}, driver_agent_http_address=None, driver_node_id=None)" - mocked_rjc_get_job_info = mocker.patch.object( - JobSubmissionClient, "get_job_info", return_value=job_details_example - ) - job_details = ray_job_client.get_job_info(job_id="mocked_job_id") - - mocked_rjc_get_job_info.assert_called_once_with(job_id="mocked_job_id") - assert job_details == job_details_example - - -def test_rjc_get_job_status(ray_job_client, mocker): - job_status_example = "" - mocked_rjc_get_job_status = mocker.patch.object( - JobSubmissionClient, "get_job_status", return_value=job_status_example - ) - job_status = ray_job_client.get_job_status(job_id="mocked_job_id") - - mocked_rjc_get_job_status.assert_called_once_with(job_id="mocked_job_id") - assert job_status == job_status_example - - -def test_rjc_tail_job_logs(ray_job_client, mocker): - logs_example = [ - "Job started...", - "Processing input data...", - "Finalizing results...", - "Job completed successfully.", - ] - mocked_rjc_tail_job_logs = mocker.patch.object( - JobSubmissionClient, "tail_job_logs", return_value=logs_example - ) - job_tail_job_logs = ray_job_client.tail_job_logs(job_id="mocked_job_id") - - mocked_rjc_tail_job_logs.assert_called_once_with(job_id="mocked_job_id") - assert job_tail_job_logs == logs_example - - -def test_rjc_list_jobs(ray_job_client, mocker): - requirements_path = "tests/e2e/mnist_pip_requirements.txt" - pytorch_lightning = get_package_and_version("pytorch_lightning", requirements_path) - torchmetrics = get_package_and_version("torchmetrics", requirements_path) - torchvision = get_package_and_version("torchvision", requirements_path) - jobs_list = [ - f"JobDetails(type=, job_id=None, submission_id='raysubmit_4k2NYS1YbRXYPZCM', driver_info=None, status=, entrypoint='python mnist.py', message='Job finished successfully.', error_type=None, start_time=1701352132585, end_time=1701352192002, metadata={{}}, runtime_env={{'working_dir': 'gcs://_ray_pkg_6200b93a110e8033.zip', 'pip': {{'packages': ['{pytorch_lightning}', 'ray_lightning', '{torchmetrics}', '{torchvision}'], 'pip_check': False}}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}}, driver_agent_http_address='http://10.131.0.18:52365', driver_node_id='9fb515995f5fb13ad4db239ceea378333bebf0a2d45b6aa09d02e691')", - f"JobDetails(type=, job_id=None, submission_id='raysubmit_iRuwU8vdkbUZZGvT', driver_info=None, status=, entrypoint='python mnist.py', message='Job was intentionally stopped.', error_type=None, start_time=1701353096163, end_time=1701353097733, metadata={{}}, runtime_env={{'working_dir': 'gcs://_ray_pkg_6200b93a110e8033.zip', 'pip': {{'packages': ['{pytorch_lightning}', 'ray_lightning', '{torchmetrics}', '{torchvision}'], 'pip_check': False}}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}}, driver_agent_http_address='http://10.131.0.18:52365', driver_node_id='9fb515995f5fb13ad4db239ceea378333bebf0a2d45b6aa09d02e691')", - ] - mocked_rjc_list_jobs = mocker.patch.object( - JobSubmissionClient, "list_jobs", return_value=jobs_list - ) - job_list_jobs = ray_job_client.list_jobs() - - mocked_rjc_list_jobs.assert_called_once() - assert job_list_jobs == jobs_list - - -def test_cluster_config_deprecation_conversion(mocker): - config = ClusterConfiguration( - name="test", - num_gpus=2, - head_gpus=1, - min_memory=3, - max_memory=4, - min_cpus=1, - max_cpus=2, - ) - assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 2} - assert config.head_extended_resource_requests == {"nvidia.com/gpu": 1} - assert config.worker_memory_requests == "3G" - assert config.worker_memory_limits == "4G" - assert config.worker_cpu_requests == 1 - assert config.worker_cpu_limits == 2 - - -""" - Ipywidgets tests -""" - - -@patch.dict( - "os.environ", {"JPY_SESSION_NAME": "example-test"} -) # Mock Jupyter environment variable -def test_cluster_up_down_buttons(mocker): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - cluster = Cluster(createClusterConfig()) - - with patch("ipywidgets.Button") as MockButton, patch( - "ipywidgets.Checkbox" - ) as MockCheckbox, patch("ipywidgets.Output"), patch("ipywidgets.HBox"), patch( - "ipywidgets.VBox" - ), patch.object( - cluster, "up" - ) as mock_up, patch.object( - cluster, "down" - ) as mock_down, patch.object( - cluster, "wait_ready" - ) as mock_wait_ready: - # Create mock button & CheckBox instances - mock_up_button = MagicMock() - mock_down_button = MagicMock() - mock_wait_ready_check_box = MagicMock() - - # Ensure the mock Button class returns the mock button instances in sequence - MockCheckbox.side_effect = [mock_wait_ready_check_box] - MockButton.side_effect = [mock_up_button, mock_down_button] - - # Call the method under test - cf_widgets.cluster_up_down_buttons(cluster) - - # Simulate checkbox being checked or unchecked - mock_wait_ready_check_box.value = True # Simulate checkbox being checked - - # Simulate the button clicks by calling the mock on_click handlers - mock_up_button.on_click.call_args[0][0](None) # Simulate clicking "Cluster Up" - mock_down_button.on_click.call_args[0][0]( - None - ) # Simulate clicking "Cluster Down" - - # Check if the `up` and `down` methods were called - mock_wait_ready.assert_called_once() - mock_up.assert_called_once() - mock_down.assert_called_once() - - -@patch.dict("os.environ", {}, clear=True) # Mock environment with no variables -def test_is_notebook_false(): - assert cf_widgets.is_notebook() is False - - -@patch.dict( - "os.environ", {"JPY_SESSION_NAME": "example-test"} -) # Mock Jupyter environment variable -def test_is_notebook_true(): - assert cf_widgets.is_notebook() is True - - -def test_view_clusters(mocker, capsys): - # If is not a notebook environment, a warning should be raised - with pytest.warns( - UserWarning, - match="view_clusters can only be used in a Jupyter Notebook environment.", - ): - result = cf_widgets.view_clusters("default") - - # Assert the function returns None when not in a notebook environment - assert result is None - - # Prepare to run view_clusters when notebook environment is detected - mocker.patch("codeflare_sdk.common.widgets.widgets.is_notebook", return_value=True) - mock_get_current_namespace = mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", - return_value="default", - ) - namespace = mock_get_current_namespace.return_value - - # Assert the function returns None when no clusters are found - mock_fetch_cluster_data = mocker.patch( - "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", - return_value=pd.DataFrame(), - ) - result = cf_widgets.view_clusters() - captured = capsys.readouterr() - assert mock_fetch_cluster_data.return_value.empty - assert "No clusters found in the default namespace." in captured.out - assert result is None - - # Prepare to run view_clusters with a test DataFrame - mock_fetch_cluster_data = mocker.patch( - "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", - return_value=pd.DataFrame( - { - "Name": ["test-cluster"], - "Namespace": ["default"], - "Num Workers": ["1"], - "Head GPUs": ["0"], - "Worker GPUs": ["0"], - "Head CPU Req~Lim": ["1~1"], - "Head Memory Req~Lim": ["1Gi~1Gi"], - "Worker CPU Req~Lim": ["1~1"], - "Worker Memory Req~Lim": ["1Gi~1Gi"], - "status": ['Ready ✓'], - } - ), - ) - # Create a RayClusterManagerWidgets instance - ray_cluster_manager_instance = cf_widgets.RayClusterManagerWidgets( - ray_clusters_df=mock_fetch_cluster_data.return_value, namespace=namespace - ) - # Patch the constructor of RayClusterManagerWidgets to return our initialized instance - mock_constructor = mocker.patch( - "codeflare_sdk.common.widgets.widgets.RayClusterManagerWidgets", - return_value=ray_cluster_manager_instance, - ) - - # Use a spy to track calls to display_widgets without replacing it - spy_display_widgets = mocker.spy(ray_cluster_manager_instance, "display_widgets") - - cf_widgets.view_clusters() - - mock_constructor.assert_called_once_with( - ray_clusters_df=mock_fetch_cluster_data.return_value, namespace=namespace - ) - - spy_display_widgets.assert_called_once() - - -def test_delete_cluster(mocker, capsys): - name = "test-cluster" - namespace = "default" - - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - - mock_ray_cluster = MagicMock() - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=[ - mock_ray_cluster, - client.ApiException(status=404), - client.ApiException(status=404), - mock_ray_cluster, - ], - ) - - # In this scenario, the RayCluster exists and the AppWrapper does not. - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._check_aw_exists", return_value=False - ) - mock_delete_rc = mocker.patch( - "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object" - ) - cf_widgets._delete_cluster(name, namespace) - - mock_delete_rc.assert_called_once_with( - group="ray.io", - version="v1", - namespace=namespace, - plural="rayclusters", - name=name, - ) - - # In this scenario, the AppWrapper exists and the RayCluster does not - mocker.patch( - "codeflare_sdk.ray.cluster.cluster._check_aw_exists", return_value=True - ) - mock_delete_aw = mocker.patch( - "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object" - ) - cf_widgets._delete_cluster(name, namespace) - - mock_delete_aw.assert_called_once_with( - group="workload.codeflare.dev", - version="v1beta2", - namespace=namespace, - plural="appwrappers", - name=name, - ) - - # In this scenario, the deletion of the resource times out. - with pytest.raises( - TimeoutError, match=f"Timeout waiting for {name} to be deleted." - ): - cf_widgets._delete_cluster(name, namespace, 1) - - -def test_ray_cluster_manager_widgets_init(mocker, capsys): - namespace = "default" - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), - ) - test_ray_clusters_df = pd.DataFrame( - { - "Name": ["test-cluster-1", "test-cluster-2"], - "Namespace": [namespace, namespace], - "Num Workers": ["1", "2"], - "Head GPUs": ["0", "0"], - "Worker GPUs": ["0", "0"], - "Head CPU Req~Lim": ["1~1", "1~1"], - "Head Memory Req~Lim": ["1Gi~1Gi", "1Gi~1Gi"], - "Worker CPU Req~Lim": ["1~1", "1~1"], - "Worker Memory Req~Lim": ["1Gi~1Gi", "1Gi~1Gi"], - "status": [ - 'Ready ✓', - 'Ready ✓', - ], - } - ) - mock_fetch_cluster_data = mocker.patch( - "codeflare_sdk.common.widgets.widgets._fetch_cluster_data", - return_value=test_ray_clusters_df, - ) - mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", - return_value=namespace, - ) - mock_delete_cluster = mocker.patch( - "codeflare_sdk.common.widgets.widgets._delete_cluster" - ) - - # # Mock ToggleButtons - mock_toggle_buttons = mocker.patch("ipywidgets.ToggleButtons") - mock_button = mocker.patch("ipywidgets.Button") - mock_output = mocker.patch("ipywidgets.Output") - - # Initialize the RayClusterManagerWidgets instance - ray_cluster_manager_instance = cf_widgets.RayClusterManagerWidgets( - ray_clusters_df=test_ray_clusters_df, namespace=namespace - ) - - # Assertions for DataFrame and attributes - assert ray_cluster_manager_instance.ray_clusters_df.equals( - test_ray_clusters_df - ), "ray_clusters_df attribute does not match the input DataFrame" - assert ( - ray_cluster_manager_instance.namespace == namespace - ), f"Expected namespace to be '{namespace}', but got '{ray_cluster_manager_instance.namespace}'" - assert ( - ray_cluster_manager_instance.classification_widget.options - == test_ray_clusters_df["Name"].tolist() - ), "classification_widget options do not match the input DataFrame" - - # Assertions for widgets - mock_toggle_buttons.assert_called_once_with( - options=test_ray_clusters_df["Name"].tolist(), - value=test_ray_clusters_df["Name"].tolist()[0], - description="Select an existing cluster:", - ) - assert ( - ray_cluster_manager_instance.classification_widget - == mock_toggle_buttons.return_value - ), "classification_widget is not set correctly" - assert ( - ray_cluster_manager_instance.delete_button == mock_button.return_value - ), "delete_button is not set correctly" - assert ( - ray_cluster_manager_instance.list_jobs_button == mock_button.return_value - ), "list_jobs_button is not set correctly" - assert ( - ray_cluster_manager_instance.ray_dashboard_button == mock_button.return_value - ), "ray_dashboard_button is not set correctly" - assert ( - ray_cluster_manager_instance.raycluster_data_output == mock_output.return_value - ), "raycluster_data_output is not set correctly" - assert ( - ray_cluster_manager_instance.user_output == mock_output.return_value - ), "user_output is not set correctly" - assert ( - ray_cluster_manager_instance.url_output == mock_output.return_value - ), "url_output is not set correctly" - - ### Test button click events - mock_delete_button = MagicMock() - mock_list_jobs_button = MagicMock() - mock_ray_dashboard_button = MagicMock() - - mock_javascript = mocker.patch("codeflare_sdk.common.widgets.widgets.Javascript") - ray_cluster_manager_instance.url_output = MagicMock() - - mock_dashboard_uri = mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.cluster_dashboard_uri", - return_value="https://ray-dashboard-test-cluster-1-ns.apps.cluster.awsroute.org", - ) - - # Simulate clicking the list jobs button - ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" - ray_cluster_manager_instance._on_list_jobs_button_click(mock_list_jobs_button) - - captured = capsys.readouterr() - assert ( - f"Opening Ray Jobs Dashboard for test-cluster-1 cluster:\n{mock_dashboard_uri.return_value}/#/jobs" - in captured.out - ) - mock_javascript.assert_called_with( - f'window.open("{mock_dashboard_uri.return_value}/#/jobs", "_blank");' - ) - - # Simulate clicking the Ray dashboard button - ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" - ray_cluster_manager_instance._on_ray_dashboard_button_click( - mock_ray_dashboard_button - ) - - captured = capsys.readouterr() - assert ( - f"Opening Ray Dashboard for test-cluster-1 cluster:\n{mock_dashboard_uri.return_value}" - in captured.out - ) - mock_javascript.assert_called_with( - f'window.open("{mock_dashboard_uri.return_value}", "_blank");' - ) - - # Simulate clicking the delete button - ray_cluster_manager_instance.classification_widget.value = "test-cluster-1" - ray_cluster_manager_instance._on_delete_button_click(mock_delete_button) - mock_delete_cluster.assert_called_with("test-cluster-1", namespace) - - mock_fetch_cluster_data.return_value = pd.DataFrame() - ray_cluster_manager_instance.classification_widget.value = "test-cluster-2" - ray_cluster_manager_instance._on_delete_button_click(mock_delete_button) - mock_delete_cluster.assert_called_with("test-cluster-2", namespace) - - # Assert on deletion that the dataframe is empty - assert ( - ray_cluster_manager_instance.ray_clusters_df.empty - ), "Expected DataFrame to be empty after deletion" - - captured = capsys.readouterr() - assert ( - f"Cluster test-cluster-1 in the {namespace} namespace was deleted successfully." - in captured.out - ) - - -def test_fetch_cluster_data(mocker): - # Return empty dataframe when no clusters are found - mocker.patch("codeflare_sdk.ray.cluster.cluster.list_all_clusters", return_value=[]) - df = cf_widgets._fetch_cluster_data(namespace="default") - assert df.empty - - # Create mock RayCluster objects - mock_raycluster1 = MagicMock(spec=RayCluster) - mock_raycluster1.name = "test-cluster-1" - mock_raycluster1.namespace = "default" - mock_raycluster1.num_workers = 1 - mock_raycluster1.head_extended_resources = {"nvidia.com/gpu": "1"} - mock_raycluster1.worker_extended_resources = {"nvidia.com/gpu": "2"} - mock_raycluster1.head_cpu_requests = "500m" - mock_raycluster1.head_cpu_limits = "1000m" - mock_raycluster1.head_mem_requests = "1Gi" - mock_raycluster1.head_mem_limits = "2Gi" - mock_raycluster1.worker_cpu_requests = "1000m" - mock_raycluster1.worker_cpu_limits = "2000m" - mock_raycluster1.worker_mem_requests = "2Gi" - mock_raycluster1.worker_mem_limits = "4Gi" - mock_raycluster1.status = MagicMock() - mock_raycluster1.status.name = "READY" - mock_raycluster1.status = RayClusterStatus.READY - - mock_raycluster2 = MagicMock(spec=RayCluster) - mock_raycluster2.name = "test-cluster-2" - mock_raycluster2.namespace = "default" - mock_raycluster2.num_workers = 2 - mock_raycluster2.head_extended_resources = {} - mock_raycluster2.worker_extended_resources = {} - mock_raycluster2.head_cpu_requests = None - mock_raycluster2.head_cpu_limits = None - mock_raycluster2.head_mem_requests = None - mock_raycluster2.head_mem_limits = None - mock_raycluster2.worker_cpu_requests = None - mock_raycluster2.worker_cpu_limits = None - mock_raycluster2.worker_mem_requests = None - mock_raycluster2.worker_mem_limits = None - mock_raycluster2.status = MagicMock() - mock_raycluster2.status.name = "SUSPENDED" - mock_raycluster2.status = RayClusterStatus.SUSPENDED - - with patch( - "codeflare_sdk.ray.cluster.cluster.list_all_clusters", - return_value=[mock_raycluster1, mock_raycluster2], - ): - # Call the function under test - df = cf_widgets._fetch_cluster_data(namespace="default") - - # Expected DataFrame - expected_data = { - "Name": ["test-cluster-1", "test-cluster-2"], - "Namespace": ["default", "default"], - "Num Workers": [1, 2], - "Head GPUs": ["nvidia.com/gpu: 1", "0"], - "Worker GPUs": ["nvidia.com/gpu: 2", "0"], - "Head CPU Req~Lim": ["500m~1000m", "0~0"], - "Head Memory Req~Lim": ["1Gi~2Gi", "0~0"], - "Worker CPU Req~Lim": ["1000m~2000m", "0~0"], - "Worker Memory Req~Lim": ["2Gi~4Gi", "0~0"], - "status": [ - 'Ready ✓', - 'Suspended ❄️', - ], - } - - expected_df = pd.DataFrame(expected_data) - - # Assert that the DataFrame matches expected - pd.testing.assert_frame_equal( - df.reset_index(drop=True), expected_df.reset_index(drop=True) - ) - - -def test_format_status(): - # Test each possible status - test_cases = [ - (RayClusterStatus.READY, 'Ready ✓'), - ( - RayClusterStatus.SUSPENDED, - 'Suspended ❄️', - ), - (RayClusterStatus.FAILED, 'Failed ✗'), - (RayClusterStatus.UNHEALTHY, 'Unhealthy'), - (RayClusterStatus.UNKNOWN, 'Unknown'), - ] - - for status, expected_output in test_cases: - assert ( - cf_widgets._format_status(status) == expected_output - ), f"Failed for status: {status}" - - # Test an unrecognized status - unrecognized_status = "NotAStatus" - assert ( - cf_widgets._format_status(unrecognized_status) == "NotAStatus" - ), "Failed for unrecognized status" - - -# Make sure to always keep this function last -def test_cleanup(): - os.remove(f"{aw_dir}unit-test-no-kueue.yaml") - os.remove(f"{aw_dir}unit-test-cluster.yaml") - os.remove(f"{aw_dir}test.yaml") - os.remove(f"{aw_dir}raytest2.yaml") - os.remove(f"{aw_dir}unit-test-cluster-ray.yaml") - os.remove("tls-cluster-namespace/ca.crt") - os.remove("tls-cluster-namespace/tls.crt") - os.remove("tls-cluster-namespace/tls.key") - os.rmdir("tls-cluster-namespace") diff --git a/tests/unit_test_support.py b/tests/unit_test_support.py deleted file mode 100644 index b3c2e1977..000000000 --- a/tests/unit_test_support.py +++ /dev/null @@ -1,60 +0,0 @@ -from codeflare_sdk.ray.cluster.cluster import ( - Cluster, - ClusterConfiguration, -) - - -def createClusterConfig(): - config = ClusterConfiguration( - name="unit-test-cluster", - namespace="ns", - num_workers=2, - worker_cpu_requests=3, - worker_cpu_limits=4, - worker_memory_requests=5, - worker_memory_limits=6, - worker_extended_resource_requests={"nvidia.com/gpu": 7}, - appwrapper=True, - machine_types=["cpu.small", "gpu.large"], - image_pull_secrets=["unit-test-pull-secret"], - write_to_file=True, - ) - return config - - -def createClusterWithConfig(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) - cluster = Cluster(createClusterConfig()) - return cluster - - -def createClusterWrongType(): - config = ClusterConfiguration( - name="unit-test-cluster", - namespace="ns", - num_workers=2, - worker_cpu_requests=[], - worker_cpu_limits=4, - worker_memory_requests=5, - worker_memory_limits=6, - worker_extended_resource_requests={"nvidia.com/gpu": 7}, - appwrapper=True, - machine_types=[True, False], - image_pull_secrets=["unit-test-pull-secret"], - image="quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06", - write_to_file=True, - labels={1: 1}, - ) - return config - - -def get_package_and_version(package_name, requirements_file_path): - with open(requirements_file_path, "r") as file: - for line in file: - if line.strip().startswith(f"{package_name}=="): - return line.strip() - return None