From e8ba7cf30d411e2e3b05d5e9d286089d71d089c7 Mon Sep 17 00:00:00 2001 From: razo7 Date: Wed, 16 Aug 2023 13:08:32 +0300 Subject: [PATCH] Poll on change of LastUpdateTime when status update fail Wait until the cache is updated in order to prevent reading a stale status in the next reconcile and making wrong decisions based on it. --- .../fenceagentsremediation_controller.go | 48 ++++++++++++++++--- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index f3dd4b75..3dae2b3a 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -31,6 +31,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" utilErrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/wait" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -104,12 +105,36 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct // At the end of each Reconcile we try to update CR's status defer func() { - if updateErr := r.updateStatus(ctx, far); updateErr != nil { + var ( + updateErr, pollErr error + ) + counter := 0 + if updateErr = r.updateStatus(ctx, far); updateErr != nil { if apiErrors.IsConflict(updateErr) { r.Log.Info("Conflict has occurred on updating the CR status") } finalErr = utilErrors.NewAggregate([]error{updateErr, finalErr}) finalResult = requeueImmediately + + // Wait until the cache is updated in order to prevent reading a stale status in the next reconcile + // and making wrong decisions based on it. + pollErr = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + counter++ + tmpFar := &v1alpha1.FenceAgentsRemediation{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(far), tmpFar); err != nil { + if apiErrors.IsNotFound(err) { + // nothing we can do anymore + return true, nil + } + return false, err + } + r.Log.Info("Try update FAR Status Conditions", "counter", counter) + return tmpFar.Status.LastUpdateTime != nil && (tmpFar.Status.LastUpdateTime.Equal(far.Status.LastUpdateTime) || tmpFar.Status.LastUpdateTime.After(far.Status.LastUpdateTime.Time)), nil + }) + if pollErr != nil { + newErr := fmt.Errorf("failed to wait for updated cache to be updated in status update after 5 seconds of timeout - %w", pollErr) + finalErr = utilErrors.NewAggregate([]error{newErr, finalErr}) + } } r.Log.Info("Finish FenceAgentsRemediation Reconcile") }() @@ -141,7 +166,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct } r.Log.Info("Finalizer was added", "CR Name", req.Name) - if err := updateConditions(v1alpha1.RemediationStarted, &far.Status.Conditions, r.Log); err != nil { + if err := updateConditions(v1alpha1.RemediationStarted, far, r.Log); err != nil { return emptyResult, err } return requeueImmediately, nil @@ -214,7 +239,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct } r.Log.Info("Fence Agent command was finished successfully", "Fence Agent", far.Spec.Agent, "Node name", req.Name, "Response", SuccessFAResponse) - if err := updateConditions(v1alpha1.FenceAgentSucceeded, &far.Status.Conditions, r.Log); err != nil { + if err := updateConditions(v1alpha1.FenceAgentSucceeded, far, r.Log); err != nil { return emptyResult, err } return requeueImmediately, nil @@ -228,7 +253,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct return emptyResult, err } - if err := updateConditions(v1alpha1.RemediationFinished, &far.Status.Conditions, r.Log); err != nil { + if err := updateConditions(v1alpha1.RemediationFinished, far, r.Log); err != nil { return emptyResult, err } r.Log.Info("FenceAgentsRemediation CR has completed to remediate the node", "Node Name", req.Name) @@ -259,17 +284,18 @@ func (r *FenceAgentsRemediationReconciler) updateStatus(ctx context.Context, far // updateConditions updates the status conditions of a FenceAgentsRemediation object based on the provided ProcessingChangeReason. // return an error if an unknown ProcessingChangeReason is provided -func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions *[]metav1.Condition, log logr.Logger) error { +func updateConditions(reason v1alpha1.ProcessingChangeReason, far *v1alpha1.FenceAgentsRemediation, log logr.Logger) error { var ( processingConditionStatus, fenceAgentActionSucceededConditionStatus, succeededConditionStatus metav1.ConditionStatus conditionMessage string ) + currentConditions := &far.Status.Conditions // All the ProcessingChangeReasons can happen one after another // RemediationStarted will always be the first reason. // FenceAgentSucceeded can only happen after RemediationStarted happened // RemediationFinished can only happen after FenceAgentSucceeded happened - + conditionHasBeenChanged := false switch reason { case v1alpha1.RemediationStarted: processingConditionStatus = metav1.ConditionTrue @@ -301,7 +327,6 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions return nil } - log.Info("updating Status Condition", "reason", reason, "processingConditionStatus", processingConditionStatus, "fenceAgentActionSucceededConditionStatus", fenceAgentActionSucceededConditionStatus, "succededConditionStatus", succeededConditionStatus, "reason", string(reason)) // if the requested Status.Conditions.Processing is different then the current one, then update Status.Conditions.Processing value if processingConditionStatus != "" && !meta.IsStatusConditionPresentAndEqual(*currentConditions, commonConditions.ProcessingType, processingConditionStatus) { meta.SetStatusCondition(currentConditions, metav1.Condition{ @@ -310,6 +335,7 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions Reason: string(reason), Message: conditionMessage, }) + conditionHasBeenChanged = true } // if the requested Status.Conditions.FenceAgentActionSucceeded is different then the current one, then update Status.Conditions.FenceAgentActionSucceeded value @@ -320,6 +346,7 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions Reason: string(reason), Message: conditionMessage, }) + conditionHasBeenChanged = true } // if the requested Status.Conditions.Succeeded is different then the current one, then update Status.Conditions.Succeeded value @@ -330,7 +357,14 @@ func updateConditions(reason v1alpha1.ProcessingChangeReason, currentConditions Reason: string(reason), Message: conditionMessage, }) + conditionHasBeenChanged = true + } + // Only update lastUpdate when there were other changes + if conditionHasBeenChanged { + now := metav1.Now() + far.Status.LastUpdateTime = &now } + log.Info("Updating Status Condition", "reason", reason, "processingConditionStatus", processingConditionStatus, "fenceAgentActionSucceededConditionStatus", fenceAgentActionSucceededConditionStatus, "succededConditionStatus", succeededConditionStatus, "reason", string(reason), "LastUpdateTime", far.Status.LastUpdateTime) return nil }