From 170a3a4bac9fcde23b1a0ca03d8d08c60e76227c Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Tue, 2 Apr 2019 01:23:33 -0700 Subject: [PATCH 1/8] Use time window --- cmd/kured/main.go | 28 ++++++++++-- pkg/timewindow/days.go | 83 ++++++++++++++++++++++++++++++++++++ pkg/timewindow/timewindow.go | 64 +++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 pkg/timewindow/days.go create mode 100644 pkg/timewindow/timewindow.go diff --git a/cmd/kured/main.go b/cmd/kured/main.go index cffa09d81..842b59c76 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -21,6 +21,7 @@ import ( "github.com/weaveworks/kured/pkg/daemonsetlock" "github.com/weaveworks/kured/pkg/delaytick" "github.com/weaveworks/kured/pkg/notifications/slack" + "github.com/weaveworks/kured/pkg/timewindow" ) var ( @@ -38,6 +39,11 @@ var ( slackUsername string podSelectors []string + rebootDays []string + rebootStart string + rebootEnd string + timezone string + // Metrics rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: "kured", @@ -79,6 +85,15 @@ func main() { rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil, "label selector identifying pods whose presence should prevent reboots") + rootCmd.PersistentFlags().StringArrayVar(&rebootDays, "reboot-on-days", []string{"sun", "mon", "tue", "wed", "thu", "fri", "sat"}, + "only reboot on these days") + rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00", + "only reboot after this time of day") + rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59", + "only reboot before this time of day") + rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC", + "use this timezone to calculate allowed reboot time") + if err := rootCmd.Execute(); err != nil { log.Fatal(err) } @@ -265,7 +280,7 @@ type nodeMeta struct { Unschedulable bool `json:"unschedulable"` } -func rebootAsRequired(nodeID string) { +func rebootAsRequired(nodeID string, window *timewindow.TimeWindow) { config, err := rest.InClusterConfig() if err != nil { log.Fatal(err) @@ -289,7 +304,7 @@ func rebootAsRequired(nodeID string) { source := rand.NewSource(time.Now().UnixNano()) tick := delaytick.New(source, period) for _ = range tick { - if rebootRequired() && !rebootBlocked(client, nodeID) { + if window.Contains(time.Now()) && rebootRequired() && !rebootBlocked(client, nodeID) { node, err := client.CoreV1().Nodes().Get(nodeID, metav1.GetOptions{}) if err != nil { log.Fatal(err) @@ -318,12 +333,19 @@ func root(cmd *cobra.Command, args []string) { log.Fatal("KURED_NODE_ID environment variable required") } + // Make sure location is loadable (to prevent errors down the line) + window, err := timewindow.New(rebootDays, rebootStart, rebootEnd, timezone) + if err != nil { + log.Fatalf("Failed to build time window: %v", err) + } + log.Infof("Node ID: %s", nodeID) log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation) log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period) log.Infof("Blocking Pod Selectors: %v", podSelectors) + log.Infof("Reboot on: %v", window) - go rebootAsRequired(nodeID) + go rebootAsRequired(nodeID, window) go maintainRebootRequiredMetric(nodeID) http.Handle("/metrics", promhttp.Handler()) diff --git a/pkg/timewindow/days.go b/pkg/timewindow/days.go new file mode 100644 index 000000000..298cfe44d --- /dev/null +++ b/pkg/timewindow/days.go @@ -0,0 +1,83 @@ +package timewindow + +import ( + "fmt" + "strconv" + "strings" + "time" +) + +var dayStrings = map[string]time.Weekday{ + "su": time.Sunday, + "sun": time.Sunday, + "sunday": time.Sunday, + "mo": time.Monday, + "mon": time.Monday, + "monday": time.Monday, + "tu": time.Tuesday, + "tue": time.Tuesday, + "tuesday": time.Tuesday, + "we": time.Wednesday, + "wed": time.Wednesday, + "wednesday": time.Wednesday, + "th": time.Thursday, + "thu": time.Thursday, + "thursday": time.Thursday, + "fr": time.Friday, + "fri": time.Friday, + "friday": time.Friday, + "sa": time.Saturday, + "sat": time.Saturday, + "saturday": time.Saturday, +} + +type weekdays []time.Weekday + +func parseWeekdays(days []string) (weekdays, error) { + var result []time.Weekday + for _, day := range days { + weekday, err := parseWeekday(day) + if err != nil { + return nil, err + } + + result = append(result, weekday) + } + + return weekdays(result), nil +} + +func (w weekdays) Contains(day time.Weekday) bool { + for _, d := range w { + if d == day { + return true + } + } + + return false +} + +func (w weekdays) String() string { + var days []string + for _, d := range w { + days = append(days, d.String()) + } + + return strings.Join(days, ",") +} + +func parseWeekday(day string) (time.Weekday, error) { + if n, err := strconv.Atoi(day); err == nil { + if n >= 0 && n < 7 { + return time.Weekday(n), nil + } else { + return time.Sunday, fmt.Errorf("Invalid weekday, number out of range: %s", day) + } + } + + if day, ok := dayStrings[strings.ToLower(day)]; ok { + return day, nil + } else { + return time.Sunday, fmt.Errorf("Invalid weekday: %s", day) + } +} diff --git a/pkg/timewindow/timewindow.go b/pkg/timewindow/timewindow.go new file mode 100644 index 000000000..b2ca5327c --- /dev/null +++ b/pkg/timewindow/timewindow.go @@ -0,0 +1,64 @@ +package timewindow + +import ( + "fmt" + "time" +) + +// Represents a time window +type TimeWindow struct { + days weekdays + location *time.Location + startTime time.Time + endTime time.Time +} + +func New(days []string, startTime, endTime, location string) (*TimeWindow, error) { + tw := &TimeWindow{} + + var err error + if tw.days, err = parseWeekdays(days); err != nil { + return nil, err + } + + if tw.location, err = time.LoadLocation(location); err != nil { + return nil, err + } + + if tw.startTime, err = parseTime(startTime, tw.location); err != nil { + return nil, err + } + + if tw.endTime, err = parseTime(endTime, tw.location); err != nil { + return nil, err + } + + return tw, nil +} + +func (tw *TimeWindow) Contains(t time.Time) bool { + loctime := t.In(tw.location) + if !tw.days.Contains(loctime.Weekday()) { + return false + } + + start := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.startTime.Hour(), tw.startTime.Minute(), tw.startTime.Second(), 0, tw.location) + end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 0, tw.location) + + return loctime.After(start) && loctime.Before(end) +} + +func (tw *TimeWindow) String() string { + return fmt.Sprintf("%s between %02d:%02d and %02d:%02d %s", tw.days.String(), tw.startTime.Hour(), tw.startTime.Minute(), tw.endTime.Hour(), tw.endTime.Minute(), tw.location.String()) +} + +func parseTime(s string, loc *time.Location) (time.Time, error) { + fmts := []string{"15:04", "15:04:06", "03:04pm", "15", "03pm"} + for _, f := range fmts { + if t, err := time.ParseInLocation(f, s, loc); err == nil { + return t, nil + } + } + + return time.Now(), fmt.Errorf("Invalid time format: %s", s) +} From 3f6713fa658c7ab66756c0ff09010ae11400f143 Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Tue, 2 Apr 2019 01:28:59 -0700 Subject: [PATCH 2/8] More forgiving inputs --- cmd/kured/main.go | 2 +- pkg/timewindow/timewindow.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 842b59c76..7ca3b216c 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -85,7 +85,7 @@ func main() { rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil, "label selector identifying pods whose presence should prevent reboots") - rootCmd.PersistentFlags().StringArrayVar(&rebootDays, "reboot-on-days", []string{"sun", "mon", "tue", "wed", "thu", "fri", "sat"}, + rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-on-days", []string{"sun", "mon", "tue", "wed", "thu", "fri", "sat"}, "only reboot on these days") rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00", "only reboot after this time of day") diff --git a/pkg/timewindow/timewindow.go b/pkg/timewindow/timewindow.go index b2ca5327c..c1062cbbb 100644 --- a/pkg/timewindow/timewindow.go +++ b/pkg/timewindow/timewindow.go @@ -53,7 +53,7 @@ func (tw *TimeWindow) String() string { } func parseTime(s string, loc *time.Location) (time.Time, error) { - fmts := []string{"15:04", "15:04:06", "03:04pm", "15", "03pm"} + fmts := []string{"15:04", "15:04:06", "03:04pm", "15", "03pm", "3pm"} for _, f := range fmts { if t, err := time.ParseInLocation(f, s, loc); err == nil { return t, nil From 7d85e85db0292ba4fcddc4dae474bb7201e2b55d Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Thu, 4 Apr 2019 01:02:27 -0700 Subject: [PATCH 3/8] weekdays to use a bitmap, add (minimal) tests --- cmd/kured/main.go | 2 +- pkg/timewindow/days.go | 46 +++++++++++++++---------- pkg/timewindow/days_test.go | 46 +++++++++++++++++++++++++ pkg/timewindow/timewindow.go | 6 +++- pkg/timewindow/timewindow_test.go | 56 +++++++++++++++++++++++++++++++ 5 files changed, 137 insertions(+), 19 deletions(-) create mode 100644 pkg/timewindow/days_test.go create mode 100644 pkg/timewindow/timewindow_test.go diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 7ca3b216c..a4fde16bb 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -85,7 +85,7 @@ func main() { rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil, "label selector identifying pods whose presence should prevent reboots") - rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-on-days", []string{"sun", "mon", "tue", "wed", "thu", "fri", "sat"}, + rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-on-days", timewindow.EveryDay, "only reboot on these days") rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00", "only reboot after this time of day") diff --git a/pkg/timewindow/days.go b/pkg/timewindow/days.go index 298cfe44d..2d6c42d3e 100644 --- a/pkg/timewindow/days.go +++ b/pkg/timewindow/days.go @@ -7,6 +7,9 @@ import ( "time" ) +var EveryDay = []string{"su", "mo", "tu", "we", "th", "fr", "sa"} + +// dayStrings maps day strings to time.Weekdays var dayStrings = map[string]time.Weekday{ "su": time.Sunday, "sun": time.Sunday, @@ -31,39 +34,48 @@ var dayStrings = map[string]time.Weekday{ "saturday": time.Saturday, } -type weekdays []time.Weekday +type weekdays uint32 +// parseWeekdays creates a set of weekdays from a string slice func parseWeekdays(days []string) (weekdays, error) { - var result []time.Weekday + var result uint32 for _, day := range days { + if len(day) == 0 { + continue + } + weekday, err := parseWeekday(day) if err != nil { - return nil, err + return weekdays(0), err } - result = append(result, weekday) + result |= 1 << uint32(weekday) } return weekdays(result), nil } +// Contains returns true if the specified weekday is a member of this set. func (w weekdays) Contains(day time.Weekday) bool { - for _, d := range w { - if d == day { - return true - } - } - - return false + return uint32(w)&(1< Date: Thu, 4 Apr 2019 01:04:39 -0700 Subject: [PATCH 4/8] Improve help strings (?) --- cmd/kured/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index a4fde16bb..3188e226a 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -85,14 +85,14 @@ func main() { rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil, "label selector identifying pods whose presence should prevent reboots") - rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-on-days", timewindow.EveryDay, - "only reboot on these days") + rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-days", timewindow.EveryDay, + "schedule reboot on these days") rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00", - "only reboot after this time of day") + "schedule reboot only after this time of day") rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59", - "only reboot before this time of day") + "schedule reboot only before this time of day") rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC", - "use this timezone to calculate allowed reboot time") + "use this timezone for schedule inputs") if err := rootCmd.Execute(); err != nil { log.Fatal(err) From 972ef4240b1ed34d7e73bfa3c76ec654e24cc501 Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Thu, 4 Apr 2019 01:06:03 -0700 Subject: [PATCH 5/8] Comment --- cmd/kured/main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 3188e226a..68983dff8 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -333,7 +333,6 @@ func root(cmd *cobra.Command, args []string) { log.Fatal("KURED_NODE_ID environment variable required") } - // Make sure location is loadable (to prevent errors down the line) window, err := timewindow.New(rebootDays, rebootStart, rebootEnd, timezone) if err != nil { log.Fatalf("Failed to build time window: %v", err) From 3a89e3e058a37d9d451ebf6da10f15929c43c3f5 Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Tue, 9 Apr 2019 18:02:11 -0700 Subject: [PATCH 6/8] Add documentation section, params to daemonset, tzdata to docker container --- README.md | 28 ++++++++++++++++++++++++++++ cmd/kured/Dockerfile | 2 +- kured-ds.yaml | 4 ++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4bade6ccc..edb3df34f 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ * [Installation](#installation) * [Configuration](#configuration) * [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period) + * [Setting a schedule](#setting-a-schedule) * [Blocking Reboots via Alerts](#blocking-reboots-via-alerts) * [Blocking Reboots via Pods](#blocking-reboots-via-pods) * [Prometheus Metrics](#prometheus-metrics) @@ -72,13 +73,17 @@ Flags: --blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots --ds-name string name of daemonset on which to place lock (default "kured") --ds-namespace string namespace containing daemonset on which to place lock (default "kube-system") + --end-time string only reboot before this time of day (default "23:59") -h, --help help for kured --lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock") --period duration reboot check period (default 1h0m0s) --prometheus-url string Prometheus instance to probe for active alerts + --reboot-days strings only reboot on these days (default [su,mo,tu,we,th,fr,sa]) --reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required") --slack-hook-url string slack hook URL for reboot notfications --slack-username string slack username for reboot notfications (default "kured") + --start-time string only reboot after this time of day (default "0:00") + --time-zone string use this timezone to calculate allowed reboot time (default "UTC") ``` ### Reboot Sentinel File & Period @@ -89,6 +94,29 @@ values with `--reboot-sentinel` and `--period`. Each replica of the daemon uses a random offset derived from the period on startup so that nodes don't all contend for the lock simultaneously. +### Setting a schedule + +By default, kured will reboot any time it detects the sentinel, but this +may cause reboots during odd hours. While service disruption does not +normally occur, anything is possible and operators may want to restrict +reboots to predictable schedules. Use `--reboot-days`, `--start-time`, +`--end-time`, and `--time-zone` to set a schedule. For example, business +hours on the west coast USA can be specified with: + +``` + --reboot-days mon,tue,wed,thu,fri + --start-time 9am + --end-time 5pm + --time-zone America/Los_Angeles +``` + +Times can be formatted in numerous ways, including `5pm`, `5:00pm` `17:00`, +and `17`. `--time-zone` represents a Go `time.Location`, and can be `UTC`, +`Local`, or any entry in the standard Linux tz database. + +Note that when using smaller time windows, you should consider shortening +the sentinel check period (`--period`). + ### Blocking Reboots via Alerts You may find it desirable to block automatic node reboots when there diff --git a/cmd/kured/Dockerfile b/cmd/kured/Dockerfile index dba402113..1a264557a 100644 --- a/cmd/kured/Dockerfile +++ b/cmd/kured/Dockerfile @@ -1,5 +1,5 @@ FROM alpine:3.8 -RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/* +RUN apk update && apk add ca-certificates tzdata && rm -rf /var/cache/apk/* # NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details ADD https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl /usr/bin/kubectl RUN chmod 0755 /usr/bin/kubectl diff --git a/kured-ds.yaml b/kured-ds.yaml index 72978dc7f..c68369f8e 100644 --- a/kured-ds.yaml +++ b/kured-ds.yaml @@ -51,9 +51,13 @@ spec: # - --blocking-pod-selector=... # - --ds-name=kured # - --ds-namespace=kube-system +# - --end-time=23:59 # - --lock-annotation=weave.works/kured-node-lock # - --period=1h # - --prometheus-url=http://prometheus.monitoring.svc.cluster.local +# - --reboot-days=sun,mon,tue,wed,thu,fri,sat # - --reboot-sentinel=/var/run/reboot-required # - --slack-hook-url=https://hooks.slack.com/... # - --slack-username=prod +# - --start-time=0:00 +# - --time-zone=UTC From 357687b05365e8222467e79f1a637c88e4bd301a Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Thu, 18 Apr 2019 18:00:29 -0700 Subject: [PATCH 7/8] Fix seconds format in parser, address (an unimportant) corner case --- cmd/kured/main.go | 2 +- kured-ds.yaml | 2 +- pkg/timewindow/timewindow.go | 6 +++--- pkg/timewindow/timewindow_test.go | 4 ++++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 68983dff8..af5ab6c48 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -89,7 +89,7 @@ func main() { "schedule reboot on these days") rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00", "schedule reboot only after this time of day") - rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59", + rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59:59", "schedule reboot only before this time of day") rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC", "use this timezone for schedule inputs") diff --git a/kured-ds.yaml b/kured-ds.yaml index c68369f8e..81033e0bb 100644 --- a/kured-ds.yaml +++ b/kured-ds.yaml @@ -51,7 +51,7 @@ spec: # - --blocking-pod-selector=... # - --ds-name=kured # - --ds-namespace=kube-system -# - --end-time=23:59 +# - --end-time=23:59:59 # - --lock-annotation=weave.works/kured-node-lock # - --period=1h # - --prometheus-url=http://prometheus.monitoring.svc.cluster.local diff --git a/pkg/timewindow/timewindow.go b/pkg/timewindow/timewindow.go index 7189f9e3e..6ac1a0eb1 100644 --- a/pkg/timewindow/timewindow.go +++ b/pkg/timewindow/timewindow.go @@ -45,9 +45,9 @@ func (tw *TimeWindow) Contains(t time.Time) bool { } start := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.startTime.Hour(), tw.startTime.Minute(), tw.startTime.Second(), 0, tw.location) - end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 0, tw.location) + end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 1e9-1, tw.location) - return loctime.After(start) && loctime.Before(end) + return (loctime.After(start) || loctime.Equal(start)) && (loctime.Before(end) || loctime.Equal(end)) } // String returns a string representation of this time window. @@ -57,7 +57,7 @@ func (tw *TimeWindow) String() string { // parseTime tries to parse a time with several formats. func parseTime(s string, loc *time.Location) (time.Time, error) { - fmts := []string{"15:04", "15:04:06", "03:04pm", "15", "03pm", "3pm"} + fmts := []string{"15:04", "15:04:05", "03:04pm", "15", "03pm", "3pm"} for _, f := range fmts { if t, err := time.ParseInLocation(f, s, loc); err == nil { return t, nil diff --git a/pkg/timewindow/timewindow_test.go b/pkg/timewindow/timewindow_test.go index 289a166b6..a629a4552 100644 --- a/pkg/timewindow/timewindow_test.go +++ b/pkg/timewindow/timewindow_test.go @@ -36,6 +36,10 @@ func TestTimeWindows(t *testing.T) { {"2019/04/10 10:30 PDT", true}, {"2019/04/11 10:30 PDT", false}, }}, + {"mo,tu,we,th,fr", "00:00", "23:59:59", "UTC", []testcase{ + {"2019/04/18 00:00 UTC", true}, + {"2019/04/18 23:59 UTC", true}, + }}, } for i, tst := range tests { From 7249f1c57392f90442a5779ba42064d40861ae00 Mon Sep 17 00:00:00 2001 From: JJ Jordan Date: Thu, 18 Apr 2019 18:02:13 -0700 Subject: [PATCH 8/8] Reduce silliness --- pkg/timewindow/days.go | 4 ---- pkg/timewindow/days_test.go | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/timewindow/days.go b/pkg/timewindow/days.go index 2d6c42d3e..2635c84e5 100644 --- a/pkg/timewindow/days.go +++ b/pkg/timewindow/days.go @@ -62,10 +62,6 @@ func (w weekdays) Contains(day time.Weekday) bool { // String returns a string representation of the set of weekdays. func (w weekdays) String() string { - if uint32(w) == 0 { - return "(No days)" - } - var b strings.Builder for i := uint32(0); i < 7; i++ { if uint32(w)&(1<