Skip to content

Commit

Permalink
Inject cluster name as an environment variable into head and worker pods
Browse files Browse the repository at this point in the history
  • Loading branch information
root authored and Yicheng-Lu-llll committed Feb 28, 2023
1 parent f82e7ea commit 11efe1d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
10 changes: 7 additions & 3 deletions ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ func initReadinessProbeHandler(probe *v1.Probe, rayNodeType rayiov1alpha1.RayNod
}

// BuildPod a pod config
func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayNodeType, rayStartParams map[string]string, svcName string, headPort string, enableRayAutoscaler *bool, creator string) (aPod v1.Pod) {
func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayNodeType, rayStartParams map[string]string, svcName string, headPort string, enableRayAutoscaler *bool, creator string, clusterName string) (aPod v1.Pod) {
pod := v1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Expand Down Expand Up @@ -329,7 +329,7 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
setInitContainerEnvVars(&pod.Spec.InitContainers[index], svcName)
}

setContainerEnvVars(&pod, rayContainerIndex, rayNodeType, rayStartParams, svcName, headPort, creator)
setContainerEnvVars(&pod, rayContainerIndex, rayNodeType, rayStartParams, svcName, headPort, creator, clusterName)

// health check only if FT enabled
if podTemplateSpec.Annotations != nil {
Expand Down Expand Up @@ -547,7 +547,7 @@ func setInitContainerEnvVars(container *v1.Container, svcName string) {
}
}

func setContainerEnvVars(pod *v1.Pod, rayContainerIndex int, rayNodeType rayiov1alpha1.RayNodeType, rayStartParams map[string]string, svcName string, headPort string, creator string) {
func setContainerEnvVars(pod *v1.Pod, rayContainerIndex int, rayNodeType rayiov1alpha1.RayNodeType, rayStartParams map[string]string, svcName string, headPort string, creator string, clusterName string) {
// set IP to local host if head, or the the svc otherwise RAY_IP
// set the port RAY_PORT
// set the password?
Expand All @@ -573,6 +573,10 @@ func setContainerEnvVars(pod *v1.Pod, rayContainerIndex int, rayNodeType rayiov1
portEnv := v1.EnvVar{Name: RAY_PORT, Value: headPort}
container.Env = append(container.Env, portEnv)
}
if !envVarExists(CLUSTER_NAME, container.Env) {
clusterNameEnv := v1.EnvVar{Name: CLUSTER_NAME, Value: clusterName}
container.Env = append(container.Env, clusterNameEnv)
}
if strings.ToLower(creator) == RayServiceCreatorLabelValue {
// Only add this env for Ray Service cluster to improve service SLA.
if !envVarExists(RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO, container.Env) {
Expand Down
12 changes: 6 additions & 6 deletions ray-operator/controllers/ray/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ func TestBuildPod(t *testing.T) {
podName := strings.ToLower(cluster.Name + DashSymbol + string(rayiov1alpha1.HeadNode) + DashSymbol + utils.FormatInt32(0))
svcName := utils.GenerateServiceName(cluster.Name)
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName, "6379")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", nil, "")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", nil, "", cluster.Name)

// Check RAY_ADDRESS env.
checkPodEnv(t, pod, RAY_ADDRESS, "127.0.0.1:6379")
Expand Down Expand Up @@ -387,7 +387,7 @@ func TestBuildPod(t *testing.T) {
worker := cluster.Spec.WorkerGroupSpecs[0]
podName = cluster.Name + DashSymbol + string(rayiov1alpha1.WorkerNode) + DashSymbol + worker.GroupName + DashSymbol + utils.FormatInt32(0)
podTemplateSpec = DefaultWorkerPodTemplate(*cluster, worker, podName, svcName, "6379")
pod = BuildPod(podTemplateSpec, rayiov1alpha1.WorkerNode, worker.RayStartParams, svcName, "6379", nil, "")
pod = BuildPod(podTemplateSpec, rayiov1alpha1.WorkerNode, worker.RayStartParams, svcName, "6379", nil, "", cluster.Name)

// Check RAY_ADDRESS env
checkPodEnv(t, pod, RAY_ADDRESS, "raycluster-sample-head-svc:6379")
Expand Down Expand Up @@ -416,7 +416,7 @@ func TestBuildPod_WithAutoscalerEnabled(t *testing.T) {
podName := strings.ToLower(cluster.Name + DashSymbol + string(rayiov1alpha1.HeadNode) + DashSymbol + utils.FormatInt32(0))
svcName := utils.GenerateServiceName(cluster.Name)
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName, "6379")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, "")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, "", cluster.Name)

actualResult := pod.Labels[RayClusterLabelKey]
expectedResult := cluster.Name
Expand Down Expand Up @@ -472,7 +472,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) {
podName := strings.ToLower(cluster.Name + DashSymbol + string(rayiov1alpha1.HeadNode) + DashSymbol + utils.FormatInt32(0))
svcName := utils.GenerateServiceName(cluster.Name)
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName, "6379")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, RayServiceCreatorLabelValue)
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, RayServiceCreatorLabelValue, cluster.Name)

hasCorrectDeathEnv := false
for _, container := range pod.Spec.Containers {
Expand Down Expand Up @@ -544,7 +544,7 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) {
SecurityContext: &customSecurityContext,
}
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName, "6379")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, "")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", &trueFlag, "", cluster.Name)
expectedContainer := *autoscalerContainer.DeepCopy()
expectedContainer.Image = customAutoscalerImage
expectedContainer.ImagePullPolicy = customPullPolicy
Expand Down Expand Up @@ -689,7 +689,7 @@ func TestCleanupInvalidVolumeMounts(t *testing.T) {
podName := strings.ToLower(cluster.Name + DashSymbol + string(rayiov1alpha1.HeadNode) + DashSymbol + utils.FormatInt32(0))
svcName := utils.GenerateServiceName(cluster.Name)
podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName, "6379")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", nil, "")
pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, "6379", nil, "", cluster.Name)

pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, []v1.VolumeMount{
{
Expand Down
4 changes: 2 additions & 2 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ func (r *RayClusterReconciler) buildHeadPod(instance rayiov1alpha1.RayCluster) c
podConf := common.DefaultHeadPodTemplate(instance, instance.Spec.HeadGroupSpec, podName, svcName, headPort)
r.Log.Info("head pod labels", "labels", podConf.Labels)
creatorName := getCreator(instance)
pod := common.BuildPod(podConf, rayiov1alpha1.HeadNode, instance.Spec.HeadGroupSpec.RayStartParams, svcName, headPort, autoscalingEnabled, creatorName)
pod := common.BuildPod(podConf, rayiov1alpha1.HeadNode, instance.Spec.HeadGroupSpec.RayStartParams, svcName, headPort, autoscalingEnabled, creatorName, instance.Name)
// Set raycluster instance as the owner and controller
if err := controllerutil.SetControllerReference(&instance, &pod, r.Scheme); err != nil {
r.Log.Error(err, "Failed to set controller reference for raycluster pod")
Expand Down Expand Up @@ -799,7 +799,7 @@ func (r *RayClusterReconciler) buildWorkerPod(instance rayiov1alpha1.RayCluster,
autoscalingEnabled := instance.Spec.EnableInTreeAutoscaling
podTemplateSpec := common.DefaultWorkerPodTemplate(instance, worker, podName, svcName, headPort)
creatorName := getCreator(instance)
pod := common.BuildPod(podTemplateSpec, rayiov1alpha1.WorkerNode, worker.RayStartParams, svcName, headPort, autoscalingEnabled, creatorName)
pod := common.BuildPod(podTemplateSpec, rayiov1alpha1.WorkerNode, worker.RayStartParams, svcName, headPort, autoscalingEnabled, creatorName, instance.Name)
// Set raycluster instance as the owner and controller
if err := controllerutil.SetControllerReference(&instance, &pod, r.Scheme); err != nil {
r.Log.Error(err, "Failed to set controller reference for raycluster pod")
Expand Down

0 comments on commit 11efe1d

Please sign in to comment.