-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stop Remediation When NHC Timed Out Annotation Exists #72
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ import ( | |
"fmt" | ||
|
||
"github.com/go-logr/logr" | ||
commonAnnotations "github.com/medik8s/common/pkg/annotations" | ||
|
||
apiErrors "k8s.io/apimachinery/pkg/api/errors" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
|
@@ -35,6 +36,8 @@ import ( | |
) | ||
|
||
const ( | ||
// errors | ||
errorNhcTimedOut = "stop remediation when NHC timed out annotaion exists" | ||
errorMissingParams = "nodeParameters or sharedParameters or both are missing, and they cannot be empty" | ||
errorMissingNodeParams = "node parameter is required, and cannot be empty" | ||
SuccessFAResponse = "Success: Rebooted" | ||
|
@@ -129,6 +132,13 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct | |
return emptyResult, nil | ||
} | ||
|
||
// Check NHC timeout annotation | ||
if isTimedOutByNHC(far) { | ||
r.Log.Info("FAR remediation was stopped by Node Healthcheck Operator") | ||
// TODO: update status and return its error | ||
return emptyResult, errors.New(errorNhcTimedOut) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why returning an error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought that if we wouldn't return an error, then on the next reconcile the CR will be processed and will pass this if on the way to execute FA and delete workloads. But thinking about it again, the CR will be stopped here until the NHC annotation would be removed from the CR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
exactly, and removing the annotation will trigger a reconcile automatically, no need to requeue ourself 🙂 |
||
} | ||
|
||
// Fetch the FAR's pod | ||
r.Log.Info("Fetch FAR's pod") | ||
pod, err := utils.GetFenceAgentsRemediationPod(r.Client) | ||
|
@@ -176,6 +186,15 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct | |
return emptyResult, nil | ||
} | ||
|
||
// isTimedOutByNHC checks if NHC set a timeout annotation on the CR | ||
func isTimedOutByNHC(far *v1alpha1.FenceAgentsRemediation) bool { | ||
if far != nil && far.Annotations != nil && far.DeletionTimestamp == nil { | ||
_, isTimeoutIssued := far.Annotations[commonAnnotations.NhcTimedOut] | ||
return isTimeoutIssued | ||
} | ||
return false | ||
} | ||
|
||
// buildFenceAgentParams collects the FAR's parameters for the node based on FAR CR, and if the CR is missing parameters | ||
// or the CR's name don't match nodeParamter name or it has an action which is different than reboot, then return an error | ||
func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, error) { | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can go before the if block above?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it can