Skip to content

Commit

Permalink
Poll on change of LastUpdateTime when status update fail
Browse files Browse the repository at this point in the history
Wait until the cache is updated in order to prevent reading a stale status in the next reconcile and making wrong decisions based on it.
  • Loading branch information
razo7 committed Aug 16, 2023
1 parent 6b53b13 commit e8ba7cf
Showing 1 changed file with 41 additions and 7 deletions.
48 changes: 41 additions & 7 deletions controllers/fenceagentsremediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilErrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -104,12 +105,36 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct

// At the end of each Reconcile we try to update CR's status
defer func() {
if updateErr := r.updateStatus(ctx, far); updateErr != nil {
var (
updateErr, pollErr error
)
counter := 0
if updateErr = r.updateStatus(ctx, far); updateErr != nil {
if apiErrors.IsConflict(updateErr) {
r.Log.Info("Conflict has occurred on updating the CR status")
}
finalErr = utilErrors.NewAggregate([]error{updateErr, finalErr})
finalResult = requeueImmediately

// Wait until the cache is updated in order to prevent reading a stale status in the next reconcile
// and making wrong decisions based on it.
pollErr = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
counter++
tmpFar := &v1alpha1.FenceAgentsRemediation{}
if err := r.Client.Get(ctx, client.ObjectKeyFromObject(far), tmpFar); err != nil {
if apiErrors.IsNotFound(err) {
// nothing we can do anymore
return true, nil
}
return false, err
}
r.Log.Info("Try update FAR Status Conditions", "counter", counter)
return tmpFar.Status.LastUpdateTime != nil && (tmpFar.Status.LastUpdateTime.Equal(far.Status.LastUpdateTime) || tmpFar.Status.LastUpdateTime.After(far.Status.LastUpdateTime.Time)), nil
})
if pollErr != nil {
newErr := fmt.Errorf("failed to wait for updated cache to be updated in status update after 5 seconds of timeout - %w", pollErr)
finalErr = utilErrors.NewAggregate([]error{newErr, finalErr})
}
}
r.Log.Info("Finish FenceAgentsRemediation Reconcile")
}()
Expand Down Expand Up @@ -141,7 +166,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
}
r.Log.Info("Finalizer was added", "CR Name", req.Name)

if err := updateConditions(v1alpha1.RemediationStarted, &far.Status.Conditions, r.Log); err != nil {
if err := updateConditions(v1alpha1.RemediationStarted, far, r.Log); err != nil {
return emptyResult, err
}
return requeueImmediately, nil
Expand Down Expand Up @@ -214,7 +239,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
}

r.Log.Info("Fence Agent command was finished successfully", "Fence Agent", far.Spec.Agent, "Node name", req.Name, "Response", SuccessFAResponse)
if err := updateConditions(v1alpha1.FenceAgentSucceeded, &far.Status.Conditions, r.Log); err != nil {
if err := updateConditions(v1alpha1.FenceAgentSucceeded, far, r.Log); err != nil {
return emptyResult, err
}
return requeueImmediately, nil
Expand All @@ -228,7 +253,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
return emptyResult, err
}

if err := updateConditions(v1alpha1.RemediationFinished, &far.Status.Conditions, r.Log); err != nil {
if err := updateConditions(v1alpha1.RemediationFinished, far, r.Log); err != nil {
return emptyResult, err
}
r.Log.Info("FenceAgentsRemediation CR has completed to remediate the node", "Node Name", req.Name)
Expand Down Expand Up @@ -259,17 +284,18 @@ func (r *FenceAgentsRemediationReconciler) updateStatus(ctx context.Context, far

// updateConditions updates the status conditions of a FenceAgentsRemediation object based on the provided ProcessingChangeReason.
// return an error if an unknown ProcessingChangeReason is provided
func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions *[]metav1.Condition, log logr.Logger) error {
func updateConditions(reason v1alpha1.ProcessingChangeReason, far *v1alpha1.FenceAgentsRemediation, log logr.Logger) error {

var (
processingConditionStatus, fenceAgentActionSucceededConditionStatus, succeededConditionStatus metav1.ConditionStatus
conditionMessage string
)
currentConditions := &far.Status.Conditions
// All the ProcessingChangeReasons can happen one after another
// RemediationStarted will always be the first reason.
// FenceAgentSucceeded can only happen after RemediationStarted happened
// RemediationFinished can only happen after FenceAgentSucceeded happened

conditionHasBeenChanged := false
switch reason {
case v1alpha1.RemediationStarted:
processingConditionStatus = metav1.ConditionTrue
Expand Down Expand Up @@ -301,7 +327,6 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions
return nil
}

log.Info("updating Status Condition", "reason", reason, "processingConditionStatus", processingConditionStatus, "fenceAgentActionSucceededConditionStatus", fenceAgentActionSucceededConditionStatus, "succededConditionStatus", succeededConditionStatus, "reason", string(reason))
// if the requested Status.Conditions.Processing is different then the current one, then update Status.Conditions.Processing value
if processingConditionStatus != "" && !meta.IsStatusConditionPresentAndEqual(*currentConditions, commonConditions.ProcessingType, processingConditionStatus) {
meta.SetStatusCondition(currentConditions, metav1.Condition{
Expand All @@ -310,6 +335,7 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions
Reason: string(reason),
Message: conditionMessage,
})
conditionHasBeenChanged = true
}

// if the requested Status.Conditions.FenceAgentActionSucceeded is different then the current one, then update Status.Conditions.FenceAgentActionSucceeded value
Expand All @@ -320,6 +346,7 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions
Reason: string(reason),
Message: conditionMessage,
})
conditionHasBeenChanged = true
}

// if the requested Status.Conditions.Succeeded is different then the current one, then update Status.Conditions.Succeeded value
Expand All @@ -330,7 +357,14 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions
Reason: string(reason),
Message: conditionMessage,
})
conditionHasBeenChanged = true
}
// Only update lastUpdate when there were other changes
if conditionHasBeenChanged {
now := metav1.Now()
far.Status.LastUpdateTime = &now
}
log.Info("Updating Status Condition", "reason", reason, "processingConditionStatus", processingConditionStatus, "fenceAgentActionSucceededConditionStatus", fenceAgentActionSucceededConditionStatus, "succededConditionStatus", succeededConditionStatus, "reason", string(reason), "LastUpdateTime", far.Status.LastUpdateTime)

return nil
}
Expand Down

0 comments on commit e8ba7cf

Please sign in to comment.