Skip to content

Commit

Permalink
feat: update RayCluster .status.reason field with pod creation error
Browse files Browse the repository at this point in the history
fixes #603
  • Loading branch information
davidxia committed Oct 17, 2022
1 parent 389ba00 commit ea32961
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ray-operator/apis/ray/v1alpha1/raycluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ type RayClusterStatus struct {
LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"`
// Service Endpoints
Endpoints map[string]string `json:"endpoints,omitempty"`
// Reason provides more information about current State
Reason string `json:"reason,omitempty"`
}

// RayNodeType the type of a ray node: head/worker
Expand Down
3 changes: 3 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11138,6 +11138,9 @@ spec:
each node group.
format: int32
type: integer
reason:
description: Reason provides more information about current State
type: string
state:
description: 'INSERT ADDITIONAL STATUS FIELD - define observed state
of cluster Important: Run "make" to regenerat'
Expand Down
3 changes: 3 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11696,6 +11696,9 @@ spec:
of each node group.
format: int32
type: integer
reason:
description: Reason provides more information about current State
type: string
state:
description: 'INSERT ADDITIONAL STATUS FIELD - define observed
state of cluster Important: Run "make" to regenerat'
Expand Down
8 changes: 8 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayservices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11752,6 +11752,10 @@ spec:
of each node group.
format: int32
type: integer
reason:
description: Reason provides more information about current
State
type: string
state:
description: 'INSERT ADDITIONAL STATUS FIELD - define observed
state of cluster Important: Run "make" to regenerat'
Expand Down Expand Up @@ -11851,6 +11855,10 @@ spec:
of each node group.
format: int32
type: integer
reason:
description: Reason provides more information about current
State
type: string
state:
description: 'INSERT ADDITIONAL STATUS FIELD - define observed
state of cluster Important: Run "make" to regenerat'
Expand Down
10 changes: 10 additions & 0 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,14 @@ func (r *RayClusterReconciler) rayClusterReconcile(request ctrl.Request, instanc
}
}
if err := r.reconcilePods(instance); err != nil {
r.Log.Info("rayClusterReconcile ", "error for cluster", instance.Name)
if updateErr := r.updateClusterState(instance, rayiov1alpha1.Failed); updateErr != nil {
r.Log.Error(updateErr, "RayCluster update state error", "cluster name", request.Name)
}
r.Log.Info("rayClusterReconcile ", "trying to update cluster", instance.Name, "reason", err)
if updateErr := r.updateClusterReason(instance, err.Error()); updateErr != nil {
r.Log.Error(updateErr, "RayCluster update reason error", "cluster name", request.Name)
}
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
}
// update the status if needed
Expand Down Expand Up @@ -979,3 +984,8 @@ func (r *RayClusterReconciler) updateClusterState(instance *rayiov1alpha1.RayClu
instance.Status.State = clusterState
return r.Status().Update(context.Background(), instance)
}

func (r *RayClusterReconciler) updateClusterReason(instance *rayiov1alpha1.RayCluster, clusterReason string) error {
instance.Status.Reason = clusterReason
return r.Status().Update(context.Background(), instance)
}

0 comments on commit ea32961

Please sign in to comment.