From 6b01c9f6201638303bd09911041d84f5de3ff3d2 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Sun, 18 Jun 2023 07:12:24 +0000 Subject: [PATCH 1/3] fix --- .../controllers/ray/rayjob_controller.go | 2 +- .../controllers/ray/rayservice_controller.go | 4 +- .../ray/utils/dashboard_httpclient.go | 65 ++++--------------- ray-operator/controllers/ray/utils/util.go | 2 +- 4 files changed, 17 insertions(+), 56 deletions(-) diff --git a/ray-operator/controllers/ray/rayjob_controller.go b/ray-operator/controllers/ray/rayjob_controller.go index 4bfd9a0c80..ce310cb4a5 100644 --- a/ray-operator/controllers/ray/rayjob_controller.go +++ b/ray-operator/controllers/ray/rayjob_controller.go @@ -172,7 +172,7 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) clientURL := rayJobInstance.Status.DashboardURL if clientURL == "" { // TODO: dashboard service may be changed. Check it instead of using the same URL always - if clientURL, err = utils.FetchDashboardURL(ctx, &r.Log, r.Client, rayClusterInstance); err != nil || clientURL == "" { + if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, common.DefaultDashboardName); err != nil || clientURL == "" { if clientURL == "" { err = fmt.Errorf("empty dashboardURL") } diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index 4f2aa9a8f8..1859f2954a 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -985,7 +985,7 @@ func (r *RayServiceReconciler) updateStatusForActiveCluster(ctx context.Context, var clientURL string rayServiceStatus := &rayServiceInstance.Status.ActiveServiceStatus - if clientURL, err = utils.FetchDashboardAgentURL(ctx, &r.Log, r.Client, rayClusterInstance); err != nil || clientURL == "" { + if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, common.DefaultDashboardAgentListenPortName); err != nil || clientURL == "" { r.updateAndCheckDashboardStatus(rayServiceStatus, false, rayServiceInstance.Spec.DeploymentUnhealthySecondThreshold) return err } @@ -1025,7 +1025,7 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns rayServiceStatus = &rayServiceInstance.Status.PendingServiceStatus } - if clientURL, err = utils.FetchDashboardAgentURL(ctx, &r.Log, r.Client, rayClusterInstance); err != nil || clientURL == "" { + if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, common.DefaultDashboardAgentListenPortName); err != nil || clientURL == "" { if !r.updateAndCheckDashboardStatus(rayServiceStatus, false, rayServiceInstance.Spec.DeploymentUnhealthySecondThreshold) { logger.Info("Dashboard is unhealthy, restart the cluster.") r.markRestart(rayServiceInstance) diff --git a/ray-operator/controllers/ray/utils/dashboard_httpclient.go b/ray-operator/controllers/ray/utils/dashboard_httpclient.go index 7c8be8ea2b..6ff7ae4554 100644 --- a/ray-operator/controllers/ray/utils/dashboard_httpclient.go +++ b/ray-operator/controllers/ray/utils/dashboard_httpclient.go @@ -21,13 +21,6 @@ import ( rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1" ) -// TODO: currently the following constants are also declared in ray-operator/controllers/ray/common -// We cannot import them to avoid cycles -const ( - DefaultDashboardName = "dashboard" - DefaultDashboardAgentListenPortName = "dashboard-agent" -) - var ( // Single-application URL paths DeployPath = "/api/serve/deployments/" @@ -70,70 +63,38 @@ type RayDashboardClient struct { BaseDashboardClient } -func FetchDashboardAgentURL(ctx context.Context, log *logr.Logger, cli client.Client, rayCluster *rayv1alpha1.RayCluster) (string, error) { - dashboardAgentService := &corev1.Service{} - dashboardAgentServiceName := CheckName(GenerateDashboardServiceName(rayCluster.Name)) - if err := cli.Get(ctx, client.ObjectKey{Name: dashboardAgentServiceName, Namespace: rayCluster.Namespace}, dashboardAgentService); err != nil { - return "", err - } - - log.V(1).Info("fetchDashboardAgentURL ", "dashboard agent service found", dashboardAgentService.Name) - // TODO: compare diff and reconcile the object. For example. ServiceType might be changed or port might be modified - servicePorts := dashboardAgentService.Spec.Ports - - dashboardPort := int32(-1) - - for _, servicePort := range servicePorts { - if servicePort.Name == DefaultDashboardAgentListenPortName { - dashboardPort = servicePort.Port - break - } - } - - if dashboardPort == int32(-1) { - return "", fmtErrors.Errorf("dashboard port not found") - } - - domainName := GetClusterDomainName() - dashboardAgentURL := fmt.Sprintf("%s.%s.svc.%s:%v", - dashboardAgentService.Name, - dashboardAgentService.Namespace, - domainName, - dashboardPort) - log.V(1).Info("fetchDashboardAgentURL ", "dashboardURL", dashboardAgentURL) - return dashboardAgentURL, nil -} - -func FetchDashboardURL(ctx context.Context, log *logr.Logger, cli client.Client, rayCluster *rayv1alpha1.RayCluster) (string, error) { +// FetchHeadServiceURL fetches the URL that consists of the FQDN for the RayCluster's head service +// and the port with the given port name (defaultPortName). +func FetchHeadServiceURL(ctx context.Context, log *logr.Logger, cli client.Client, rayCluster *rayv1alpha1.RayCluster, defaultPortName string) (string, error) { headSvc := &corev1.Service{} headSvcName := GenerateServiceName(rayCluster.Name) if err := cli.Get(ctx, client.ObjectKey{Name: headSvcName, Namespace: rayCluster.Namespace}, headSvc); err != nil { return "", err } - log.V(3).Info("fetchDashboardURL ", "dashboard service found", headSvc.Name) + log.V(3).Info("FetchHeadServiceURL", "head service name", headSvc.Name, "namespace", headSvc.Namespace) servicePorts := headSvc.Spec.Ports - dashboardPort := int32(-1) + port := int32(-1) for _, servicePort := range servicePorts { - if servicePort.Name == DefaultDashboardName { - dashboardPort = servicePort.Port + if servicePort.Name == defaultPortName { + port = servicePort.Port break } } - if dashboardPort == int32(-1) { - return "", fmtErrors.Errorf("dashboard port not found") + if port == int32(-1) { + return "", fmtErrors.Errorf("%s port is not found", defaultPortName) } domainName := GetClusterDomainName() - dashboardURL := fmt.Sprintf("%s.%s.svc.%s:%v", + headServiceURL := fmt.Sprintf("%s.%s.svc.%s:%v", headSvc.Name, headSvc.Namespace, domainName, - dashboardPort) - log.V(1).Info("fetchDashboardURL ", "dashboardURL", dashboardURL) - return dashboardURL, nil + port) + log.V(1).Info("FetchHeadServiceURL", "head service URL", headServiceURL, "port", defaultPortName) + return headServiceURL, nil } func (r *RayDashboardClient) InitClient(url string) { diff --git a/ray-operator/controllers/ray/utils/util.go b/ray-operator/controllers/ray/utils/util.go index 30e4a9bef8..c2c146426e 100644 --- a/ray-operator/controllers/ray/utils/util.go +++ b/ray-operator/controllers/ray/utils/util.go @@ -129,7 +129,7 @@ func GetNamespace(metaData metav1.ObjectMeta) string { return metaData.Namespace } -// GenerateServiceName generates a ray head service name from cluster name +// GenerateServiceName generates a Ray head service name from cluster name func GenerateServiceName(clusterName string) string { return CheckName(fmt.Sprintf("%s-%s-%s", clusterName, rayv1alpha1.HeadNode, "svc")) } From c0974c02d1137fa6de9fc061f54c00863ac7a792 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Sun, 18 Jun 2023 17:50:51 +0000 Subject: [PATCH 2/3] add test --- .../ray/rayservice_controller_unit_test.go | 71 +++++++++++++++++++ .../ray/utils/dashboard_httpclient.go | 4 +- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go index 7d7c57142b..ca5deed6fc 100644 --- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go @@ -2,11 +2,13 @@ package ray import ( "context" + "fmt" "reflect" "testing" rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -372,3 +374,72 @@ func TestReconcileServices_UpdateService(t *testing.T) { assert.Equal(t, 1, len(svcList.Items), "Service list should have one item") assert.False(t, reflect.DeepEqual(*oldSvc, svcList.Items[0])) } + +func TestFetchHeadServiceURL(t *testing.T) { + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1alpha1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + namespace := "ray" + dashboardPort := int32(9999) + cluster := rayv1alpha1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: namespace, + }, + Spec: rayv1alpha1.RayClusterSpec{ + HeadGroupSpec: rayv1alpha1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "ray-head", + Ports: []corev1.ContainerPort{ + { + Name: common.DefaultDashboardName, + ContainerPort: dashboardPort, + }, + }, + }, + }, + }, + }, + }, + }, + } + + // A head service for the RayCluster. + headSvc := corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.GenerateServiceName(cluster.Name), + Namespace: cluster.ObjectMeta.Namespace, + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{ + { + Name: common.DefaultDashboardName, + Port: dashboardPort, + }, + }, + }, + } + + // Initialize a fake client with newScheme and runtimeObjects. + runtimeObjects := []runtime.Object{&headSvc} + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + + // Initialize RayCluster reconciler. + ctx := context.TODO() + r := RayServiceReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + Log: ctrl.Log.WithName("controllers").WithName("RayService"), + } + + url, err := utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, &cluster, common.DefaultDashboardName) + assert.Nil(t, err, "Fail to fetch head service url") + assert.Equal(t, fmt.Sprintf("test-cluster-head-svc.%s.svc.cluster.local:%d", namespace, dashboardPort), url, "Head service url is not correct") +} diff --git a/ray-operator/controllers/ray/utils/dashboard_httpclient.go b/ray-operator/controllers/ray/utils/dashboard_httpclient.go index 6ff7ae4554..6c54fdae22 100644 --- a/ray-operator/controllers/ray/utils/dashboard_httpclient.go +++ b/ray-operator/controllers/ray/utils/dashboard_httpclient.go @@ -72,7 +72,7 @@ func FetchHeadServiceURL(ctx context.Context, log *logr.Logger, cli client.Clien return "", err } - log.V(3).Info("FetchHeadServiceURL", "head service name", headSvc.Name, "namespace", headSvc.Namespace) + log.Info("FetchHeadServiceURL", "head service name", headSvc.Name, "namespace", headSvc.Namespace) servicePorts := headSvc.Spec.Ports port := int32(-1) @@ -93,7 +93,7 @@ func FetchHeadServiceURL(ctx context.Context, log *logr.Logger, cli client.Clien headSvc.Namespace, domainName, port) - log.V(1).Info("FetchHeadServiceURL", "head service URL", headServiceURL, "port", defaultPortName) + log.Info("FetchHeadServiceURL", "head service URL", headServiceURL, "port", defaultPortName) return headServiceURL, nil } From c7b5c96a13a83f256cd97b77f02d08989faba8e3 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Sun, 18 Jun 2023 22:07:07 +0000 Subject: [PATCH 3/3] improve test --- .../ray/rayservice_controller_unit_test.go | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go index ca5deed6fc..12a9353cac 100644 --- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go @@ -389,28 +389,7 @@ func TestFetchHeadServiceURL(t *testing.T) { Name: "test-cluster", Namespace: namespace, }, - Spec: rayv1alpha1.RayClusterSpec{ - HeadGroupSpec: rayv1alpha1.HeadGroupSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-head", - Ports: []corev1.ContainerPort{ - { - Name: common.DefaultDashboardName, - ContainerPort: dashboardPort, - }, - }, - }, - }, - }, - }, - }, - }, } - - // A head service for the RayCluster. headSvc := corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: utils.GenerateServiceName(cluster.Name), @@ -430,7 +409,7 @@ func TestFetchHeadServiceURL(t *testing.T) { runtimeObjects := []runtime.Object{&headSvc} fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() - // Initialize RayCluster reconciler. + // Initialize RayService reconciler. ctx := context.TODO() r := RayServiceReconciler{ Client: fakeClient,