diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index 5ba7b52a0e1..0e64300e4e7 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -117,6 +117,8 @@ type RayClusterStatus struct { LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` // Service Endpoints Endpoints map[string]string `json:"endpoints,omitempty"` + // Reason provides more information about current State + Reason string `json:"reason,omitempty"` } // RayNodeType the type of a ray node: head/worker diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index d2ce5baf38f..fbbb54c9cfd 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -11138,6 +11138,9 @@ spec: each node group. format: int32 type: integer + reason: + description: Reason provides more information about current State + type: string state: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerat' diff --git a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml index 5b19903d139..1fefbf1c984 100644 --- a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml @@ -11696,6 +11696,9 @@ spec: of each node group. format: int32 type: integer + reason: + description: Reason provides more information about current State + type: string state: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerat' diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index b51c8017be0..f743729b25b 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -11752,6 +11752,10 @@ spec: of each node group. format: int32 type: integer + reason: + description: Reason provides more information about current + State + type: string state: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerat' @@ -11851,6 +11855,10 @@ spec: of each node group. format: int32 type: integer + reason: + description: Reason provides more information about current + State + type: string state: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerat' diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index b8774e4be18..d9f04dd35b3 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -211,9 +211,14 @@ func (r *RayClusterReconciler) rayClusterReconcile(request ctrl.Request, instanc } } if err := r.reconcilePods(instance); err != nil { + r.Log.Info("rayClusterReconcile ", "error for cluster", instance.Name) if updateErr := r.updateClusterState(instance, rayiov1alpha1.Failed); updateErr != nil { r.Log.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) } + r.Log.Info("rayClusterReconcile ", "trying to update cluster", instance.Name, "reason", err) + if updateErr := r.updateClusterReason(instance, err.Error()); updateErr != nil { + r.Log.Error(updateErr, "RayCluster update reason error", "cluster name", request.Name) + } return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } // update the status if needed @@ -979,3 +984,8 @@ func (r *RayClusterReconciler) updateClusterState(instance *rayiov1alpha1.RayClu instance.Status.State = clusterState return r.Status().Update(context.Background(), instance) } + +func (r *RayClusterReconciler) updateClusterReason(instance *rayiov1alpha1.RayCluster, clusterReason string) error { + instance.Status.Reason = clusterReason + return r.Status().Update(context.Background(), instance) +}