Skip to content

Commit

Permalink
Merge pull request #1 from jjjordanmsft/master
Browse files Browse the repository at this point in the history
Merge upstream patch to add schedulability
  • Loading branch information
markine authored Jun 10, 2019
2 parents 4d2b876 + 7249f1c commit e5e8ac9
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 4 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* [Installation](#installation)
* [Configuration](#configuration)
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
* [Setting a schedule](#setting-a-schedule)
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
* [Blocking Reboots via Pods](#blocking-reboots-via-pods)
* [Prometheus Metrics](#prometheus-metrics)
Expand Down Expand Up @@ -74,13 +75,17 @@ Flags:
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
--ds-name string name of daemonset on which to place lock (default "kured")
--ds-namespace string namespace containing daemonset on which to place lock (default "kube-system")
--end-time string only reboot before this time of day (default "23:59")
-h, --help help for kured
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-days strings only reboot on these days (default [su,mo,tu,we,th,fr,sa])
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
--start-time string only reboot after this time of day (default "0:00")
--time-zone string use this timezone to calculate allowed reboot time (default "UTC")
```

### Reboot Sentinel File & Period
Expand All @@ -91,6 +96,29 @@ values with `--reboot-sentinel` and `--period`. Each replica of the
daemon uses a random offset derived from the period on startup so that
nodes don't all contend for the lock simultaneously.

### Setting a schedule

By default, kured will reboot any time it detects the sentinel, but this
may cause reboots during odd hours. While service disruption does not
normally occur, anything is possible and operators may want to restrict
reboots to predictable schedules. Use `--reboot-days`, `--start-time`,
`--end-time`, and `--time-zone` to set a schedule. For example, business
hours on the west coast USA can be specified with:

```
--reboot-days mon,tue,wed,thu,fri
--start-time 9am
--end-time 5pm
--time-zone America/Los_Angeles
```

Times can be formatted in numerous ways, including `5pm`, `5:00pm` `17:00`,
and `17`. `--time-zone` represents a Go `time.Location`, and can be `UTC`,
`Local`, or any entry in the standard Linux tz database.

Note that when using smaller time windows, you should consider shortening
the sentinel check period (`--period`).

### Blocking Reboots via Alerts

You may find it desirable to block automatic node reboots when there
Expand Down
2 changes: 1 addition & 1 deletion cmd/kured/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM alpine:3.8
RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/*
RUN apk update && apk add ca-certificates tzdata && rm -rf /var/cache/apk/*
# NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details
ADD https://storage.googleapis.com/kubernetes-release/release/v1.14.1/bin/linux/amd64/kubectl /usr/bin/kubectl
RUN chmod 0755 /usr/bin/kubectl
Expand Down
27 changes: 24 additions & 3 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/weaveworks/kured/pkg/daemonsetlock"
"github.com/weaveworks/kured/pkg/delaytick"
"github.com/weaveworks/kured/pkg/notifications/slack"
"github.com/weaveworks/kured/pkg/timewindow"
)

var (
Expand All @@ -38,6 +39,11 @@ var (
slackUsername string
podSelectors []string

rebootDays []string
rebootStart string
rebootEnd string
timezone string

// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "kured",
Expand Down Expand Up @@ -79,6 +85,15 @@ func main() {
rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
"label selector identifying pods whose presence should prevent reboots")

rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-days", timewindow.EveryDay,
"schedule reboot on these days")
rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00",
"schedule reboot only after this time of day")
rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59:59",
"schedule reboot only before this time of day")
rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC",
"use this timezone for schedule inputs")

if err := rootCmd.Execute(); err != nil {
log.Fatal(err)
}
Expand Down Expand Up @@ -265,7 +280,7 @@ type nodeMeta struct {
Unschedulable bool `json:"unschedulable"`
}

func rebootAsRequired(nodeID string) {
func rebootAsRequired(nodeID string, window *timewindow.TimeWindow) {
config, err := rest.InClusterConfig()
if err != nil {
log.Fatal(err)
Expand All @@ -289,7 +304,7 @@ func rebootAsRequired(nodeID string) {
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for _ = range tick {
if rebootRequired() && !rebootBlocked(client, nodeID) {
if window.Contains(time.Now()) && rebootRequired() && !rebootBlocked(client, nodeID) {
node, err := client.CoreV1().Nodes().Get(nodeID, metav1.GetOptions{})
if err != nil {
log.Fatal(err)
Expand Down Expand Up @@ -318,12 +333,18 @@ func root(cmd *cobra.Command, args []string) {
log.Fatal("KURED_NODE_ID environment variable required")
}

window, err := timewindow.New(rebootDays, rebootStart, rebootEnd, timezone)
if err != nil {
log.Fatalf("Failed to build time window: %v", err)
}

log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
log.Infof("Blocking Pod Selectors: %v", podSelectors)
log.Infof("Reboot on: %v", window)

go rebootAsRequired(nodeID)
go rebootAsRequired(nodeID, window)
go maintainRebootRequiredMetric(nodeID)

http.Handle("/metrics", promhttp.Handler())
Expand Down
4 changes: 4 additions & 0 deletions kured-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,13 @@ spec:
# - --blocking-pod-selector=...
# - --ds-name=kured
# - --ds-namespace=kube-system
# - --end-time=23:59:59
# - --lock-annotation=weave.works/kured-node-lock
# - --period=1h
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
# - --reboot-sentinel=/var/run/reboot-required
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
# - --start-time=0:00
# - --time-zone=UTC
91 changes: 91 additions & 0 deletions pkg/timewindow/days.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package timewindow

import (
"fmt"
"strconv"
"strings"
"time"
)

var EveryDay = []string{"su", "mo", "tu", "we", "th", "fr", "sa"}

// dayStrings maps day strings to time.Weekdays
var dayStrings = map[string]time.Weekday{
"su": time.Sunday,
"sun": time.Sunday,
"sunday": time.Sunday,
"mo": time.Monday,
"mon": time.Monday,
"monday": time.Monday,
"tu": time.Tuesday,
"tue": time.Tuesday,
"tuesday": time.Tuesday,
"we": time.Wednesday,
"wed": time.Wednesday,
"wednesday": time.Wednesday,
"th": time.Thursday,
"thu": time.Thursday,
"thursday": time.Thursday,
"fr": time.Friday,
"fri": time.Friday,
"friday": time.Friday,
"sa": time.Saturday,
"sat": time.Saturday,
"saturday": time.Saturday,
}

type weekdays uint32

// parseWeekdays creates a set of weekdays from a string slice
func parseWeekdays(days []string) (weekdays, error) {
var result uint32
for _, day := range days {
if len(day) == 0 {
continue
}

weekday, err := parseWeekday(day)
if err != nil {
return weekdays(0), err
}

result |= 1 << uint32(weekday)
}

return weekdays(result), nil
}

// Contains returns true if the specified weekday is a member of this set.
func (w weekdays) Contains(day time.Weekday) bool {
return uint32(w)&(1<<uint32(day)) != 0
}

// String returns a string representation of the set of weekdays.
func (w weekdays) String() string {
var b strings.Builder
for i := uint32(0); i < 7; i++ {
if uint32(w)&(1<<i) != 0 {
b.WriteString(time.Weekday(i).String()[0:3])
} else {
b.WriteString("---")
}
}

return b.String()
}

func parseWeekday(day string) (time.Weekday, error) {
if n, err := strconv.Atoi(day); err == nil {
if n >= 0 && n < 7 {
return time.Weekday(n), nil
} else {
return time.Sunday, fmt.Errorf("Invalid weekday, number out of range: %s", day)
}
}

if weekday, ok := dayStrings[strings.ToLower(day)]; ok {
return weekday, nil
} else {
return time.Sunday, fmt.Errorf("Invalid weekday: %s", day)
}
}
46 changes: 46 additions & 0 deletions pkg/timewindow/days_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package timewindow

import (
"strings"
"testing"
)

func TestParseWeekdays(t *testing.T) {
tests := []struct {
input string
result string
}{
{"0,4", "Sun---------Thu------"},
{"su,mo,tu", "SunMonTue------------"},
{"sunday,tu,thu", "Sun---Tue---Thu------"},
{"THURSDAY", "------------Thu------"},
{"we,WED,WeDnEsDaY", "---------Wed---------"},
{"", "---------------------"},
{",,,", "---------------------"},
}

for _, tst := range tests {
res, err := parseWeekdays(strings.Split(tst.input, ","))
if err != nil {
t.Errorf("Received error for input %s: %v", tst.input, err)
} else if res.String() != tst.result {
t.Errorf("Test %s: Expected %s got %s", tst.input, tst.result, res.String())
}
}
}

func TestParseWeekdaysErrors(t *testing.T) {
tests := []string{
"15",
"-8",
"8",
"mon,tue,wed,fridayyyy",
}

for _, tst := range tests {
_, err := parseWeekdays(strings.Split(tst, ","))
if err == nil {
t.Errorf("Expected to receive error for input %s", tst)
}
}
}
68 changes: 68 additions & 0 deletions pkg/timewindow/timewindow.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package timewindow

import (
"fmt"
"time"
)

// TimeWindow specifies a schedule of days and times.
type TimeWindow struct {
days weekdays
location *time.Location
startTime time.Time
endTime time.Time
}

// New creates a TimeWindow instance based on string inputs specifying a schedule.
func New(days []string, startTime, endTime, location string) (*TimeWindow, error) {
tw := &TimeWindow{}

var err error
if tw.days, err = parseWeekdays(days); err != nil {
return nil, err
}

if tw.location, err = time.LoadLocation(location); err != nil {
return nil, err
}

if tw.startTime, err = parseTime(startTime, tw.location); err != nil {
return nil, err
}

if tw.endTime, err = parseTime(endTime, tw.location); err != nil {
return nil, err
}

return tw, nil
}

// Contains determines whether the specified time is within this time window.
func (tw *TimeWindow) Contains(t time.Time) bool {
loctime := t.In(tw.location)
if !tw.days.Contains(loctime.Weekday()) {
return false
}

start := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.startTime.Hour(), tw.startTime.Minute(), tw.startTime.Second(), 0, tw.location)
end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 1e9-1, tw.location)

return (loctime.After(start) || loctime.Equal(start)) && (loctime.Before(end) || loctime.Equal(end))
}

// String returns a string representation of this time window.
func (tw *TimeWindow) String() string {
return fmt.Sprintf("%s between %02d:%02d and %02d:%02d %s", tw.days.String(), tw.startTime.Hour(), tw.startTime.Minute(), tw.endTime.Hour(), tw.endTime.Minute(), tw.location.String())
}

// parseTime tries to parse a time with several formats.
func parseTime(s string, loc *time.Location) (time.Time, error) {
fmts := []string{"15:04", "15:04:05", "03:04pm", "15", "03pm", "3pm"}
for _, f := range fmts {
if t, err := time.ParseInLocation(f, s, loc); err == nil {
return t, nil
}
}

return time.Now(), fmt.Errorf("Invalid time format: %s", s)
}
Loading

0 comments on commit e5e8ac9

Please sign in to comment.