Skip to content

Commit

Permalink
[RayService] Use DashboardPort for RayService instead of DashboardAge…
Browse files Browse the repository at this point in the history
…ntPort (ray-project#1742)

Starting in Ray 2.8.0, the Ray Serve REST API is exposed at the same port as the Ray Dashboard instead of the dashbaord agent port (though I think the dashboard agent port still works.)

In addition, as a result of that change, the dashboard agent is no longer a concept that is exposed to the user.

This PR

Updates the RayService controller to use the Dashboard endpoint instead of the DashboardAgent endpoint to make Serve REST API requests
Removes the DashboardAgentListenPort from the HeadService and ServeService of the RayCluster and RayService, because it is no longer exposed to users.
After this PR, the DashboardAgentListenPort is only used for the RayCluster health probe:



---------

Signed-off-by: Archit Kulkarni <[email protected]>
  • Loading branch information
architkulkarni authored Dec 21, 2023
1 parent a1ef760 commit 25f787b
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 65 deletions.
8 changes: 4 additions & 4 deletions ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,11 +700,11 @@ func setMissingRayStartParams(rayStartParams map[string]string, nodeType rayv1.R
// Add --block option. See https://github.com/ray-project/kuberay/pull/675
rayStartParams["block"] = "true"

// Add dashboard listen port for RayService.
// Hardcode the dashboard-agent-listen-port to the default value if it is not provided. This is purely a
// defensive measure; Ray will already use this default value if the flag is not provided.
// The default value is used by the RayCluster health probe; see https://github.com/ray-project/kuberay/issues/1760
if _, ok := rayStartParams["dashboard-agent-listen-port"]; !ok {
if value, ok := annotations[utils.EnableServeServiceKey]; ok && value == utils.EnableServeServiceTrue {
rayStartParams["dashboard-agent-listen-port"] = strconv.Itoa(utils.DefaultDashboardAgentListenPort)
}
rayStartParams["dashboard-agent-listen-port"] = strconv.Itoa(utils.DefaultDashboardAgentListenPort)
}

return rayStartParams
Expand Down
40 changes: 1 addition & 39 deletions ray-operator/controllers/ray/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ func TestBuildPod(t *testing.T) {
workerRayStartCommandEnv := getEnvVar(rayContainer, utils.KUBERAY_GEN_RAY_START_CMD)
assert.True(t, strings.Contains(workerRayStartCommandEnv.Value, "ray start"))

expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --block --memory=1073741824 --num-cpus=1 --num-gpus=3 --address=raycluster-sample-head-svc.default.svc.cluster.local:6379 --port=6379 --metrics-export-port=8080")
expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --block --dashboard-agent-listen-port=52365 --memory=1073741824 --num-cpus=1 --num-gpus=3 --address=raycluster-sample-head-svc.default.svc.cluster.local:6379 --port=6379 --metrics-export-port=8080")
actualCommandArg := splitAndSort(pod.Spec.Containers[0].Args[0])
if !reflect.DeepEqual(expectedCommandArg, actualCommandArg) {
t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, actualCommandArg)
Expand Down Expand Up @@ -1110,44 +1110,6 @@ func TestSetMissingRayStartParamsDashboardHost(t *testing.T) {
assert.Equal(t, "localhost", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "localhost", rayStartParams["dashboard-host"]))
}

func TestSetMissingRayStartParamsAgentListenPort(t *testing.T) {
// The "dashboard-agent-listen-port" port will be automatically injected into RayStartParams with a default
// value of 52365 (i.e., DefaultDashboardAgentListenPort) when the annotation "ray.io/enable-serve-service"
// is set to true. The behavior is the same for both head and workers.
headPort := "6379"
fqdnRayIP := "raycluster-kuberay-head-svc.default.svc.cluster.local"
rayStartParams := map[string]string{}
annotaions := map[string]string{}

// Case 1: Head node without "ray.io/enable-serve-service=true".
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.HeadNode, headPort, "", annotaions)
assert.NotContains(t, rayStartParams, "dashboard-agent-listen-port", "Head Pod should not have a dashboard-agent-listen-port option set by default.")

// Case 2: Worker node without "ray.io/enable-serve-service=true".
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, annotaions)
assert.NotContains(t, rayStartParams, "dashboard-agent-listen-port", "Worker Pod should not have a dashboard-agent-listen-port option set by default.")

// Case 3: Head node with "ray.io/enable-serve-service=true" and users do not provide "dashboard-agent-listen-port".
annotaions = map[string]string{utils.EnableServeServiceKey: utils.EnableServeServiceTrue}
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.HeadNode, headPort, "", annotaions)
assert.Equal(t, fmt.Sprint(utils.DefaultDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(utils.DefaultDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"]))

// Case 4: Worker node with "ray.io/enable-serve-service=true" and users do not provide "dashboard-agent-listen-port".
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, annotaions)
assert.Equal(t, fmt.Sprint(utils.DefaultDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(utils.DefaultDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"]))

// Case 5: Head node with "ray.io/enable-serve-service=true" and users provide "dashboard-agent-listen-port".
customDashboardAgentListenPort := utils.DefaultDashboardAgentListenPort + 1
rayStartParams = map[string]string{"dashboard-agent-listen-port": fmt.Sprint(customDashboardAgentListenPort)}
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.HeadNode, headPort, "", annotaions)
assert.Equal(t, fmt.Sprint(customDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(customDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"]))

// Case 6: Worker node with "ray.io/enable-serve-service=true" and users provide "dashboard-agent-listen-port".
rayStartParams = map[string]string{"dashboard-agent-listen-port": fmt.Sprint(customDashboardAgentListenPort)}
rayStartParams = setMissingRayStartParams(rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, annotaions)
assert.Equal(t, fmt.Sprint(customDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(customDashboardAgentListenPort), rayStartParams["dashboard-agent-listen-port"]))
}

func TestGetCustomWorkerInitImage(t *testing.T) {
// cleanup
defer os.Unsetenv(EnableInitContainerInjectionEnvKey)
Expand Down
9 changes: 0 additions & 9 deletions ray-operator/controllers/ray/common/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,6 @@ func getServicePorts(cluster rayv1.RayCluster) map[string]int32 {
ports = getDefaultPorts()
}

// Check if agent port is defined. If not, check if enable agent service.
if _, agentDefined := ports[utils.DashboardAgentListenPortName]; !agentDefined {
enableServeServiceValue, exist := cluster.Annotations[utils.EnableServeServiceKey]
if exist && enableServeServiceValue == utils.EnableServeServiceTrue {
// If agent port is not in the config, add default value for it.
ports[utils.DashboardAgentListenPortName] = utils.DefaultDashboardAgentListenPort
}
}

// check if metrics port is defined. If not, add default value for it.
if _, metricsDefined := ports[utils.MetricsPortName]; !metricsDefined {
ports[utils.MetricsPortName] = utils.DefaultMetricsPort
Expand Down
6 changes: 4 additions & 2 deletions ray-operator/controllers/ray/rayservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ func (r *RayServiceReconciler) updateStatusForActiveCluster(ctx context.Context,
var clientURL string
rayServiceStatus := &rayServiceInstance.Status.ActiveServiceStatus

if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, utils.DashboardAgentListenPortName); err != nil || clientURL == "" {
if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, utils.DashboardPortName); err != nil || clientURL == "" {
updateDashboardStatus(rayServiceStatus, false)
return err
}
Expand Down Expand Up @@ -1054,7 +1054,9 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns
return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err
}

if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, utils.DashboardAgentListenPortName); err != nil || clientURL == "" {
// TODO(architkulkarni): Check the RayVersion. If < 2.8.0, error.

if clientURL, err = utils.FetchHeadServiceURL(ctx, &r.Log, r.Client, rayClusterInstance, utils.DashboardPortName); err != nil || clientURL == "" {
return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err
}
rayDashboardClient := r.dashboardClientFunc()
Expand Down
11 changes: 0 additions & 11 deletions ray-operator/controllers/ray/rayservice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,6 @@ applications:
for _, pod := range workerPods.Items {
// Worker Pod should have only one container.
Expect(len(pod.Spec.Containers)).Should(Equal(1))
// Each worker Pod should have a container port with the name "dashboard-agent"
exist := false
for _, port := range pod.Spec.Containers[0].Ports {
if port.Name == utils.DashboardAgentListenPortName {
exist = true
break
}
}
if !exist {
Fail(fmt.Sprintf("Worker Pod %v should have a container port with the name %v", pod.Name, utils.DashboardAgentListenPortName))
}
}
}
})
Expand Down

0 comments on commit 25f787b

Please sign in to comment.