From f286d00bcad8ce1ab1aba70aeec3bf4c0b778aa8 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Wed, 8 Jun 2022 13:13:16 -0700 Subject: [PATCH 1/4] Add PullPolicy --- ray-operator/apis/ray/v1alpha1/raycluster_types.go | 2 ++ ray-operator/controllers/ray/common/pod.go | 5 ++++- ray-operator/controllers/ray/common/pod_test.go | 9 ++++++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index 1e174779d2..d57d9d09c0 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -71,6 +71,8 @@ type AutoscalerOptions struct { Resources *v1.ResourceRequirements `json:"resources,omitempty"` // Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. Image *string `json:"image,omitempty"` + // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. + ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"` // IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. // Defaults to 300 (five minutes). IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 0b47b6cf46..6e5f7dc034 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -170,7 +170,7 @@ func BuildAutoscalerContainer() v1.Container { // TODO: choose right version based on instance.spec.Version // The currently used image reflects changes up to https://github.com/ray-project/ray/pull/24718 Image: "rayproject/ray:448f52", - ImagePullPolicy: v1.PullAlways, + ImagePullPolicy: v1.PullIfNotPresent, Env: []v1.EnvVar{ { Name: "RAY_CLUSTER_NAME", @@ -223,6 +223,9 @@ func mergeAutoscalerOverrides(autoscalerContainer *v1.Container, autoscalerOptio if autoscalerOptions.Image != nil { autoscalerContainer.Image = *autoscalerOptions.Image } + if autoscalerOptions.ImagePullPolicy != nil { + autoscalerContainer.ImagePullPolicy = *autoscalerOptions.ImagePullPolicy + } } } diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index ef94665298..3b4886c6e5 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -175,7 +175,7 @@ var volumeMountsWithAutoscaler = []v1.VolumeMount{ var autoscalerContainer = v1.Container{ Name: "autoscaler", Image: "rayproject/ray:448f52", - ImagePullPolicy: v1.PullAlways, + ImagePullPolicy: v1.PullIfNotPresent, Env: []v1.EnvVar{ { Name: "RAY_CLUSTER_NAME", @@ -344,8 +344,9 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { svcName := utils.GenerateServiceName(cluster.Name) customAutoscalerImage := "custom-autoscaler-xxx" + customPullPolicy := v1.PullAlways customTimeout := int32(100) - customUpscaling := "Aggressive" + customUpscaling := rayiov1alpha1.UpscalingMode("Aggressive") customResources := v1.ResourceRequirements{ Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("1"), @@ -358,15 +359,17 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { } cluster.Spec.AutoscalerOptions = &rayiov1alpha1.AutoscalerOptions{ - UpscalingMode: (*rayiov1alpha1.UpscalingMode)(&customUpscaling), + UpscalingMode: &customUpscaling, IdleTimeoutSeconds: &customTimeout, Image: &customAutoscalerImage, + ImagePullPolicy: &customPullPolicy, Resources: &customResources, } podTemplateSpec := DefaultHeadPodTemplate(*cluster, cluster.Spec.HeadGroupSpec, podName, svcName) pod := BuildPod(podTemplateSpec, rayiov1alpha1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, svcName, &trueFlag) expectedContainer := *autoscalerContainer.DeepCopy() expectedContainer.Image = customAutoscalerImage + expectedContainer.ImagePullPolicy = customPullPolicy expectedContainer.Resources = customResources index := getAutoscalerContainerIndex(pod) actualContainer := pod.Spec.Containers[index] From 28094c5c6b6b0991290c5fd88e3dcfbc14addc62 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Wed, 8 Jun 2022 14:09:02 -0700 Subject: [PATCH 2/4] Test --- ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go | 7 ++++++- ray-operator/config/crd/bases/ray.io_rayclusters.yaml | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go index 1bca804721..6a073d1f20 100644 --- a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go @@ -6,7 +6,7 @@ package v1alpha1 import ( - v1 "k8s.io/api/core/v1" + "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -23,6 +23,11 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { *out = new(string) **out = **in } + if in.ImagePullPolicy != nil { + in, out := &in.ImagePullPolicy, &out.ImagePullPolicy + *out = new(v1.PullPolicy) + **out = **in + } if in.IdleTimeoutSeconds != nil { in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds *out = new(int32) diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index 43d19a56ec..549ef6ace3 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -47,6 +47,10 @@ spec: description: Image optionally overrides the autoscaler's container image. type: string + imagePullPolicy: + description: ImagePullPolicy optionally overrides the autoscaler + container's image pull policy. + type: string resources: description: Resources specifies resource requests and limits for the autoscaler container. From 55eff003b41c1d279a0564c17b514c529f3ef4f7 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Wed, 8 Jun 2022 14:18:34 -0700 Subject: [PATCH 3/4] Add v1 back in, wtf. --- ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go index 6a073d1f20..d8531f10f4 100644 --- a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go @@ -6,7 +6,7 @@ package v1alpha1 import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) From 3ba47dab0ae72c0b9ca8374063e2c9a285b0262b Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Wed, 8 Jun 2022 16:30:02 -0700 Subject: [PATCH 4/4] Switch back to PullAlways --- ray-operator/controllers/ray/common/pod.go | 2 +- ray-operator/controllers/ray/common/pod_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 6e5f7dc034..ccef9bacac 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -170,7 +170,7 @@ func BuildAutoscalerContainer() v1.Container { // TODO: choose right version based on instance.spec.Version // The currently used image reflects changes up to https://github.com/ray-project/ray/pull/24718 Image: "rayproject/ray:448f52", - ImagePullPolicy: v1.PullIfNotPresent, + ImagePullPolicy: v1.PullAlways, Env: []v1.EnvVar{ { Name: "RAY_CLUSTER_NAME", diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index 3b4886c6e5..d613c45050 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -175,7 +175,7 @@ var volumeMountsWithAutoscaler = []v1.VolumeMount{ var autoscalerContainer = v1.Container{ Name: "autoscaler", Image: "rayproject/ray:448f52", - ImagePullPolicy: v1.PullIfNotPresent, + ImagePullPolicy: v1.PullAlways, Env: []v1.EnvVar{ { Name: "RAY_CLUSTER_NAME", @@ -344,7 +344,7 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { svcName := utils.GenerateServiceName(cluster.Name) customAutoscalerImage := "custom-autoscaler-xxx" - customPullPolicy := v1.PullAlways + customPullPolicy := v1.PullIfNotPresent customTimeout := int32(100) customUpscaling := rayiov1alpha1.UpscalingMode("Aggressive") customResources := v1.ResourceRequirements{