From 96b59d5d8b1da64ed447f964ea2f71cd7201bbc0 Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Wed, 28 Aug 2024 13:35:59 +0300 Subject: [PATCH] Reinstall already installed k0s to reconfigure installFlags Signed-off-by: Kimmo Lehto --- action/apply.go | 1 + phase/gather_k0s_facts.go | 7 +- phase/install_controllers.go | 3 +- phase/reinstall.go | 137 ++++++++++++++++++ phase/upgrade_controllers.go | 2 + .../v1beta1/cluster/host.go | 51 ++++++- 6 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 phase/reinstall.go diff --git a/action/apply.go b/action/apply.go index 27e30766..da9aeab7 100644 --- a/action/apply.go +++ b/action/apply.go @@ -75,6 +75,7 @@ func (a Apply) Run() error { &phase.InstallWorkers{}, &phase.UpgradeControllers{}, &phase.UpgradeWorkers{NoDrain: a.NoDrain}, + &phase.Reinstall{}, &phase.ResetWorkers{NoDrain: a.NoDrain}, &phase.ResetControllers{NoDrain: a.NoDrain}, &phase.RunHooks{Stage: "after", Action: "apply"}, diff --git a/phase/gather_k0s_facts.go b/phase/gather_k0s_facts.go index 3a61b24c..a585ee66 100644 --- a/phase/gather_k0s_facts.go +++ b/phase/gather_k0s_facts.go @@ -271,9 +271,14 @@ func (p *GatherK0sFacts) investigateK0s(h *cluster.Host) error { h.Metadata.NeedsUpgrade = p.needsUpgrade(h) + if len(status.Args) > 2 { + // status.Args contains the binary path and the role as the first two elements, which we can ignore here. + h.Metadata.K0sStatusArgs = status.Args[2:] + } + log.Infof("%s: is running k0s %s version %s", h, h.Role, h.Metadata.K0sRunningVersion) if h.IsController() { - for _, a := range status.Args { + for _, a := range h.Metadata.K0sStatusArgs { if strings.HasPrefix(a, "--enable-dynamic-config") && !strings.HasSuffix(a, "false") { if !p.Config.Spec.K0s.DynamicConfig { log.Warnf("%s: controller has dynamic config enabled, but spec.k0s.dynamicConfig was not set in configuration, proceeding in dynamic config mode", h) diff --git a/phase/install_controllers.go b/phase/install_controllers.go index 74bcbf65..5ce0e412 100644 --- a/phase/install_controllers.go +++ b/phase/install_controllers.go @@ -77,7 +77,6 @@ func (p *InstallControllers) After() error { log.Warnf("%s: failed to invalidate worker join token: %v", p.leader, err) } _ = p.Wet(h, "overwrite k0s join token file", func() error { - if err := h.Configurer.WriteFile(h, h.K0sJoinTokenPath(), "# overwritten by k0sctl after join\n", "0600"); err != nil { log.Warnf("%s: failed to overwrite the join token file at %s", h, h.K0sJoinTokenPath()) } @@ -105,7 +104,6 @@ func (p *InstallControllers) Run() error { } return nil }) - if err != nil { return err } @@ -162,6 +160,7 @@ func (p *InstallControllers) Run() error { return err } h.Metadata.K0sInstalled = true + h.Metadata.K0sRunningVersion = p.Config.Spec.K0s.Version if p.IsWet() { if len(h.Environment) > 0 { diff --git a/phase/reinstall.go b/phase/reinstall.go new file mode 100644 index 00000000..1ef42163 --- /dev/null +++ b/phase/reinstall.go @@ -0,0 +1,137 @@ +package phase + +import ( + "context" + "fmt" + "math" + "strings" + "time" + + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1" + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster" + "github.com/k0sproject/k0sctl/pkg/node" + "github.com/k0sproject/k0sctl/pkg/retry" + "github.com/k0sproject/rig/exec" + log "github.com/sirupsen/logrus" +) + +type Reinstall struct { + GenericPhase + hosts cluster.Hosts +} + +// Title for the phase +func (p *Reinstall) Title() string { + return "Reinstall" +} + +// Prepare the phase +func (p *Reinstall) Prepare(config *v1beta1.Cluster) error { + p.Config = config + p.hosts = p.Config.Spec.Hosts.Filter(func(h *cluster.Host) bool { + return !h.Metadata.K0sInstalled && h.Metadata.K0sRunningVersion != nil && !h.Reset && h.FlagsChanged() + }) + + return nil +} + +// ShouldRun is true when there are hosts that needs to be reinstalled +func (p *Reinstall) ShouldRun() bool { + return cluster.K0sForceFlagSince.Check(p.Config.Spec.K0s.Version) && len(p.hosts) > 0 +} + +// Run the phase +func (p *Reinstall) Run() error { + if !cluster.K0sForceFlagSince.Check(p.Config.Spec.K0s.Version) { + log.Warnf("k0s version %s does not support install --force flag, installFlags won't be reconfigured", p.Config.Spec.K0s.Version) + return nil + } + controllers := p.hosts.Controllers() + if len(controllers) > 0 { + log.Infof("Reinstalling %d controllers sequentially", len(controllers)) + err := controllers.Each(func(h *cluster.Host) error { + return p.reinstall(h) + }) + if err != nil { + return err + } + } + + workers := p.hosts.Workers() + if len(workers) == 0 { + return nil + } + + concurrentReinstalls := int(math.Floor(float64(len(p.hosts)) * 0.10)) + if concurrentReinstalls == 0 { + concurrentReinstalls = 1 + } + + log.Infof("Reinstalling max %d workers in parallel", concurrentReinstalls) + + return p.hosts.BatchedParallelEach(concurrentReinstalls, p.reinstall) +} + +func (p *Reinstall) reinstall(h *cluster.Host) error { + if p.Config.Spec.K0s.DynamicConfig && h.Role != "worker" { + h.InstallFlags.AddOrReplace("--enable-dynamic-config") + } + + h.InstallFlags.AddOrReplace("--force") + + cmd, err := h.K0sInstallCommand() + if err != nil { + return err + } + log.Infof("%s: reinstalling k0s", h) + err = p.Wet(h, fmt.Sprintf("reinstall k0s using `%s", strings.ReplaceAll(cmd, h.Configurer.K0sBinaryPath(), "k0s")), func() error { + if err := h.Exec(cmd, exec.Sudo(h)); err != nil { + return fmt.Errorf("failed to reinstall k0s: %w", err) + } + return nil + }) + if err != nil { + return err + } + + err = p.Wet(h, "restart k0s service", func() error { + if err := h.Configurer.RestartService(h, h.K0sServiceName()); err != nil { + return fmt.Errorf("failed to restart k0s: %w", err) + } + log.Infof("%s: waiting for the k0s service to start", h) + if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.ServiceRunningFunc(h, h.K0sServiceName())); err != nil { + return fmt.Errorf("k0s did not restart: %w", err) + } + return nil + }) + if err != nil { + return fmt.Errorf("restart after reinstall: %w", err) + } + + if h != p.Config.Spec.K0sLeader() { + return nil + } + + if NoWait || !p.IsWet() { + log.Warnf("%s: skipping scheduler and system pod checks because --no-wait given", h) + return nil + } + + log.Infof("%s: waiting for the scheduler to become ready", h) + if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.ScheduledEventsAfterFunc(h, time.Now())); err != nil { + if !Force { + return fmt.Errorf("failed to observe scheduling events after api start-up, you can ignore this check by using --force: %w", err) + } + log.Warnf("%s: failed to observe scheduling events after api start-up: %s", h, err) + } + + log.Infof("%s: waiting for system pods to become ready", h) + if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.SystemPodsRunningFunc(h)); err != nil { + if !Force { + return fmt.Errorf("all system pods not running after api start-up, you can ignore this check by using --force: %w", err) + } + log.Warnf("%s: failed to observe system pods running after api start-up: %s", h, err) + } + + return nil +} diff --git a/phase/upgrade_controllers.go b/phase/upgrade_controllers.go index 70d01bab..71fee804 100644 --- a/phase/upgrade_controllers.go +++ b/phase/upgrade_controllers.go @@ -119,6 +119,8 @@ func (p *UpgradeControllers) Run() error { return fmt.Errorf("kube api did not become ready: %w", err) } } + + h.Metadata.K0sRunningVersion = p.Config.Spec.K0s.Version } leader := p.Config.Spec.K0sLeader() diff --git a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go index 7190a295..d3276f21 100644 --- a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go +++ b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go @@ -6,6 +6,7 @@ import ( gos "os" gopath "path" "regexp" + "sort" "strconv" "strings" "time" @@ -23,7 +24,7 @@ import ( log "github.com/sirupsen/logrus" ) -var k0sForceFlagSince = version.MustConstraint(">= v1.27.4+k0s.0") +var K0sForceFlagSince = version.MustConstraint(">= v1.27.4+k0s.0") // Host contains all the needed details to work with hosts type Host struct { @@ -183,6 +184,7 @@ type HostMetadata struct { K0sNewConfig string K0sJoinToken string K0sJoinTokenID string + K0sStatusArgs []string Arch string IsK0sLeader bool Hostname string @@ -334,7 +336,7 @@ func (h *Host) K0sInstallCommand() (string, error) { } } - if flags.Include("--force") && h.Metadata.K0sBinaryVersion != nil && !k0sForceFlagSince.Check(h.Metadata.K0sBinaryVersion) { + if flags.Include("--force") && h.Metadata.K0sBinaryVersion != nil && !K0sForceFlagSince.Check(h.Metadata.K0sBinaryVersion) { log.Warnf("%s: k0s version %s does not support the --force flag, ignoring it", h, h.Metadata.K0sBinaryVersion) flags.Delete("--force") } @@ -569,3 +571,48 @@ func (h *Host) ExpandTokens(input string, k0sVersion *version.Version) string { } return builder.String() } + +var flagParseRe = regexp.MustCompile(`--?([\w\-]+)(?:[=\s](\S+))?`) + +// FlagsChanged returns true when the flags have changed by comparing the host.Metadata.K0sStatusArgs to what host.InstallFlags would produce +func (h *Host) FlagsChanged() bool { + var formattedFlags []string + + // format the flags the same way as spf13/cobra does in k0s + for _, flag := range h.InstallFlags { + var key string + var value string + match := flagParseRe.FindStringSubmatch(flag) + if len(match) > 1 { + key = match[1] + } + if key == "f" || key == "force" { + // --force is stripped by k0s so it does not end up in status.Args + continue + } + if len(match) > 2 { + if s, err := strconv.Unquote(match[2]); err == nil { + value = s + } else { + value = match[2] + } + } else { + value = "true" + } + formattedFlags = append(formattedFlags, fmt.Sprintf("--%s=%s", key, value)) + } + + k0sArgs := h.Metadata.K0sStatusArgs + if len(k0sArgs) != len(formattedFlags) { + return true + } + sort.Strings(formattedFlags) + sort.Strings(k0sArgs) + for i := range formattedFlags { + if formattedFlags[i] != k0sArgs[i] { + return true + } + } + + return false +}