Skip to content

Commit

Permalink
Add test for configurable k8s job backoff limit (ray-project#2134)
Browse files Browse the repository at this point in the history
  • Loading branch information
jjyao authored May 10, 2024
1 parent 49a5725 commit 76633c5
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions ray-operator/controllers/ray/rayjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,79 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob {
}

var _ = Context("RayJob in K8sJobMode", func() {
Describe("RayJob SubmitterConfig BackoffLimit", func() {
ctx := context.Background()
namespace := "default"
rayJobWithDefaultSubmitterConfigBackoffLimit := rayJobTemplate("rayjob-default", namespace)
rayJobWithNonDefaultSubmitterConfigBackoffLimit := rayJobTemplate("rayjob-non-default", namespace)
rayJobWithNonDefaultSubmitterConfigBackoffLimit.Spec.SubmitterConfig = &rayv1.SubmitterConfig{
BackoffLimit: pointer.Int32(88),
}
rayJobs := make(map[*rayv1.RayJob]int32)
rayJobs[rayJobWithDefaultSubmitterConfigBackoffLimit] = int32(2)
rayJobs[rayJobWithNonDefaultSubmitterConfigBackoffLimit] = int32(88)

It("Verify RayJob spec", func() {
for rayJob := range rayJobs {
// Make sure the submission mode is K8sJobMode.
Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode))
}
})

It("Create RayJob custom resources", func() {
for rayJob := range rayJobs {
err := k8sClient.Create(ctx, rayJob)
Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob: %v", rayJob.Name)
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob),
time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name)
}
})

It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() {
for rayJob := range rayJobs {
Eventually(
getRayJobDeploymentStatus(ctx, rayJob),
time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus)
}
})

It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() {
for rayJob := range rayJobs {
rayCluster := &rayv1.RayCluster{}
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster),
time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName)

// Make RayCluster.Status.State to be rayv1.Ready.
updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)

// The RayCluster.Status.State should be Ready.
Eventually(
getClusterState(ctx, namespace, rayCluster.Name),
time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready))

// RayJobs's JobDeploymentStatus transitions to Running.
Eventually(
getRayJobDeploymentStatus(ctx, rayJob),
time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus)

}
})

It("Verify K8s Job BackoffLimit", func() {
for rayJob, backoffLimit := range rayJobs {
// In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode.
namespacedName := common.RayJobK8sJobNamespacedName(rayJob)
job := &batchv1.Job{}
err := k8sClient.Get(ctx, namespacedName, job)
Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job")
Expect(*(job.Spec.BackoffLimit)).To(Equal(backoffLimit))
}
})
})

Describe("Successful RayJob in K8sJobMode", func() {
ctx := context.Background()
namespace := "default"
Expand Down

0 comments on commit 76633c5

Please sign in to comment.