Skip to content

Commit

Permalink
feat: implement a new mode try in the config manipulation commands
Browse files Browse the repository at this point in the history
The new mode allows changing the config for a period of time, which
allows trying the configuration and automatically rolling it back in case
if it doesn't work for example.

The mode can only be used with changes that can be applied without a
reboot.

When changed it doesn't write the configuration to disk, only changes it
in memory.
`--timeout` parameter can be used to customize the rollback delay.
The default timeout is 1 minute.

Any consequent configuration change will abort try mode and the last
applied configuration will be used.

Signed-off-by: Artem Chernyshev <[email protected]>
  • Loading branch information
Unix4ever committed Apr 21, 2022
1 parent 51a68c3 commit 2b03057
Show file tree
Hide file tree
Showing 16 changed files with 1,743 additions and 1,429 deletions.
2 changes: 2 additions & 0 deletions api/machine/machine.proto
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ message ApplyConfigurationRequest {
AUTO = 1;
NO_REBOOT = 2;
STAGED = 3;
TRY = 4;
}
bytes data = 1;
// replaced by mode
Expand All @@ -92,6 +93,7 @@ message ApplyConfigurationRequest {
];
Mode mode = 4;
bool dry_run = 5;
google.protobuf.Duration try_mode_timeout = 6;
}

// ApplyConfigurationResponse describes the response to a configuration request.
Expand Down
16 changes: 11 additions & 5 deletions cmd/talosctl/cmd/talos/apply-config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ import (
"fmt"
"io/ioutil"
"strings"
"time"

"github.com/spf13/cobra"
"google.golang.org/protobuf/types/known/durationpb"

"github.com/talos-systems/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/talos-systems/talos/internal/pkg/tui/installer"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/constants"
)

var applyConfigCmdFlags struct {
Expand All @@ -24,6 +27,7 @@ var applyConfigCmdFlags struct {
filename string
insecure bool
dryRun bool
configTryTimeout time.Duration
}

// applyConfigCmd represents the applyConfiguration command.
Expand Down Expand Up @@ -110,11 +114,12 @@ var applyConfigCmd = &cobra.Command{
}

resp, err := c.ApplyConfiguration(ctx, &machineapi.ApplyConfigurationRequest{
Data: cfgBytes,
Mode: applyConfigCmdFlags.Mode.Mode,
OnReboot: applyConfigCmdFlags.OnReboot,
Immediate: applyConfigCmdFlags.Immediate,
DryRun: applyConfigCmdFlags.dryRun,
Data: cfgBytes,
Mode: applyConfigCmdFlags.Mode.Mode,
OnReboot: applyConfigCmdFlags.OnReboot,
Immediate: applyConfigCmdFlags.Immediate,
DryRun: applyConfigCmdFlags.dryRun,
TryModeTimeout: durationpb.New(applyConfigCmdFlags.configTryTimeout),
})
if err != nil {
return fmt.Errorf("error applying new configuration: %s", err)
Expand All @@ -132,6 +137,7 @@ func init() {
applyConfigCmd.Flags().BoolVarP(&applyConfigCmdFlags.insecure, "insecure", "i", false, "apply the config using the insecure (encrypted with no auth) maintenance service")
applyConfigCmd.Flags().BoolVar(&applyConfigCmdFlags.dryRun, "dry-run", false, "check how the config change will be applied in dry-run mode")
applyConfigCmd.Flags().StringSliceVar(&applyConfigCmdFlags.certFingerprints, "cert-fingerprint", nil, "list of server certificate fingeprints to accept (defaults to no check)")
applyConfigCmd.Flags().DurationVar(&applyConfigCmdFlags.configTryTimeout, "timeout", constants.ConfigTryTimeout, "the config will be rolled back after specified timeout (if try mode is selected)")
helpers.AddModeFlags(&applyConfigCmdFlags.Mode, applyConfigCmd)
addCommand(applyConfigCmd)
}
20 changes: 13 additions & 7 deletions cmd/talosctl/cmd/talos/edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ import (
"io"
"runtime"
"strings"
"time"

"github.com/spf13/cobra"
"google.golang.org/protobuf/types/known/durationpb"
yaml "gopkg.in/yaml.v3"
cmdutil "k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/cmd/util/editor"
Expand All @@ -21,13 +23,15 @@ import (
"github.com/talos-systems/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/machinery/resources/config"
)

var editCmdFlags struct {
helpers.Mode
namespace string
dryRun bool
namespace string
dryRun bool
configTryTimeout time.Duration
}

//nolint:gocyclo
Expand Down Expand Up @@ -115,11 +119,12 @@ func editFn(c *client.Client) func(context.Context, client.ResourceResponse) err
}

resp, err := c.ApplyConfiguration(ctx, &machine.ApplyConfigurationRequest{
Data: edited,
Mode: editCmdFlags.Mode.Mode,
OnReboot: editCmdFlags.OnReboot,
Immediate: editCmdFlags.Immediate,
DryRun: editCmdFlags.dryRun,
Data: edited,
Mode: editCmdFlags.Mode.Mode,
OnReboot: editCmdFlags.OnReboot,
Immediate: editCmdFlags.Immediate,
DryRun: editCmdFlags.dryRun,
TryModeTimeout: durationpb.New(editCmdFlags.configTryTimeout),
})
if err != nil {
lastError = err.Error()
Expand Down Expand Up @@ -180,5 +185,6 @@ func init() {
editCmd.Flags().StringVar(&editCmdFlags.namespace, "namespace", "", "resource namespace (default is to use default namespace per resource)")
helpers.AddModeFlags(&editCmdFlags.Mode, editCmd)
editCmd.Flags().BoolVar(&editCmdFlags.dryRun, "dry-run", false, "do not apply the change after editing and print the change summary instead")
editCmd.Flags().DurationVar(&editCmdFlags.configTryTimeout, "timeout", constants.ConfigTryTimeout, "the config will be rolled back after specified timeout (if try mode is selected)")
addCommand(editCmd)
}
24 changes: 15 additions & 9 deletions cmd/talosctl/cmd/talos/patch.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,29 @@ import (
"context"
"fmt"
"strings"
"time"

jsonpatch "github.com/evanphx/json-patch"
"github.com/spf13/cobra"
"google.golang.org/protobuf/types/known/durationpb"
yaml "gopkg.in/yaml.v3"
cmdutil "k8s.io/kubectl/pkg/cmd/util"

"github.com/talos-systems/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/config/configpatcher"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/machinery/resources/config"
)

var patchCmdFlags struct {
helpers.Mode
namespace string
patch []string
patchFile string
dryRun bool
namespace string
patch []string
patchFile string
dryRun bool
configTryTimeout time.Duration
}

func patchFn(c *client.Client, patch jsonpatch.Patch) func(context.Context, client.ResourceResponse) error {
Expand All @@ -51,11 +55,12 @@ func patchFn(c *client.Client, patch jsonpatch.Patch) func(context.Context, clie
}

resp, err := c.ApplyConfiguration(ctx, &machine.ApplyConfigurationRequest{
Data: patched,
Mode: patchCmdFlags.Mode.Mode,
OnReboot: patchCmdFlags.OnReboot,
Immediate: patchCmdFlags.Immediate,
DryRun: patchCmdFlags.dryRun,
Data: patched,
Mode: patchCmdFlags.Mode.Mode,
OnReboot: patchCmdFlags.OnReboot,
Immediate: patchCmdFlags.Immediate,
DryRun: patchCmdFlags.dryRun,
TryModeTimeout: durationpb.New(patchCmdFlags.configTryTimeout),
})

if bytes.Equal(
Expand Down Expand Up @@ -116,6 +121,7 @@ func init() {
patchCmd.Flags().StringVar(&patchCmdFlags.patchFile, "patch-file", "", "a file containing a patch to be applied to the resource.")
patchCmd.Flags().StringArrayVarP(&patchCmdFlags.patch, "patch", "p", nil, "the patch to be applied to the resource file, use @file to read a patch from file.")
patchCmd.Flags().BoolVar(&patchCmdFlags.dryRun, "dry-run", false, "print the change summary and patch preview without applying the changes")
patchCmd.Flags().DurationVar(&patchCmdFlags.configTryTimeout, "timeout", constants.ConfigTryTimeout, "the config will be rolled back after specified timeout (if try mode is selected)")
helpers.AddModeFlags(&patchCmdFlags.Mode, patchCmd)
addCommand(patchCmd)
}
4 changes: 4 additions & 0 deletions cmd/talosctl/pkg/talos/helpers/mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type Mode struct {

func (m Mode) String() string {
switch m.Mode {
case machine.ApplyConfigurationRequest_TRY:
return modeTry
case machine.ApplyConfigurationRequest_AUTO:
return modeAuto
case machine.ApplyConfigurationRequest_NO_REBOOT:
Expand Down Expand Up @@ -85,6 +87,7 @@ const (
modeReboot = "reboot"
modeStaged = "staged"
modeInteractive = "interactive"
modeTry = "try"
)

// AddModeFlags adds deprecated flags to the command and registers mode flag with it's parser.
Expand All @@ -94,6 +97,7 @@ func AddModeFlags(mode *Mode, command *cobra.Command) {
modeNoReboot: machine.ApplyConfigurationRequest_NO_REBOOT,
modeReboot: machine.ApplyConfigurationRequest_REBOOT,
modeStaged: machine.ApplyConfigurationRequest_STAGED,
modeTry: machine.ApplyConfigurationRequest_TRY,
}

deprecatedFlag := func(dest *bool, flag, usage, deprecationWarning string) {
Expand Down
11 changes: 11 additions & 0 deletions hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ so Talos no longer runs on processors supporting only baseline `x86-64` microarc
description="""\
The commands `talosctl apply-config`, `talosctl patch mc` and `talosctl edit mc` now support `--dry-run` flag.
If enabled it just prints out the selected config application mode and the configuration diff.
"""

[notes.apply-config-try]
title = "Apply Config `--mode=try`"
description="""\
The commands `talosctl apply-config`, `talosctl patch mc` and `talosctl edit mc` now support the new mode called `try`.
In this mode the config change is applied for a period of time and then reverted back to the state it was before the change.
`--timeout` parameter can be used to customize the config rollback timeout.
This new mode can be used only with the parts of the config that can be changed without a reboot and can help to check that
the new configuration doesn't break the node.
Can be especially useful to check network interfaces changes that may lead to the loss of connectivity to the node.
"""

[make_deps]
Expand Down
32 changes: 30 additions & 2 deletions internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,20 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig
in.Mode = machine.ApplyConfigurationRequest_STAGED
}

if in.Mode != machine.ApplyConfigurationRequest_TRY {
s.Controller.Runtime().CancelConfigRollbackTimeout()
}

cfgProvider, err := s.Controller.Runtime().LoadAndValidateConfig(in.GetData())
if err != nil {
return nil, err
}

//nolint:exhaustive
switch in.Mode {
// --mode=try
case machine.ApplyConfigurationRequest_TRY:
fallthrough
// --mode=no-reboot
case machine.ApplyConfigurationRequest_NO_REBOOT:
if err = s.Controller.Runtime().CanApplyImmediate(cfgProvider); err != nil {
Expand Down Expand Up @@ -214,12 +221,33 @@ Config diff:
return nil, err
}

if err := ioutil.WriteFile(constants.ConfigPath, cfg, 0o600); err != nil {
return nil, err
if in.Mode != machine.ApplyConfigurationRequest_TRY {
if err := ioutil.WriteFile(constants.ConfigPath, cfg, 0o600); err != nil {
return nil, err
}
}

//nolint:exhaustive
switch in.Mode {
// --mode=try
case machine.ApplyConfigurationRequest_TRY:
oldConfig, err := s.Controller.Runtime().Config().Bytes()
if err != nil {
return nil, err
}

timeout := constants.ConfigTryTimeout
if in.TryModeTimeout != nil {
timeout = in.TryModeTimeout.AsDuration()
}

modeDetails += fmt.Sprintf("\nThe config is applied in 'try' mode and will be automatically reverted back in %s", timeout.String())

if err := s.Controller.Runtime().RollbackToConfigAfter(oldConfig, timeout); err != nil {
return nil, err
}

fallthrough
// --mode=no-reboot
case machine.ApplyConfigurationRequest_NO_REBOOT:
if err := s.Controller.Runtime().SetConfig(cfgProvider); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions internal/app/machined/pkg/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
package runtime

import (
"time"

"github.com/talos-systems/talos/pkg/machinery/config"
)

// Runtime defines the runtime parameters.
type Runtime interface {
Config() config.Provider
LoadAndValidateConfig([]byte) (config.Provider, error)
RollbackToConfigAfter([]byte, time.Duration) error
CancelConfigRollbackTimeout()
SetConfig(config.Provider) error
CanApplyImmediate(config.Provider) error
State() State
Expand Down
36 changes: 36 additions & 0 deletions internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package v1alpha1
import (
"context"
"fmt"
"log"
"reflect"
"sync"
"time"

"github.com/cosi-project/runtime/pkg/resource"
Expand All @@ -28,6 +30,9 @@ type Runtime struct {
s runtime.State
e runtime.EventStream
l runtime.LoggingManager

rollbackTimerMu sync.Mutex
rollbackTimer *time.Timer
}

// NewRuntime initializes and returns the v1alpha1 runtime.
Expand Down Expand Up @@ -59,6 +64,37 @@ func (r *Runtime) LoadAndValidateConfig(b []byte) (config.Provider, error) {
return cfg, nil
}

// RollbackToConfigAfter implements the Runtime interface.
func (r *Runtime) RollbackToConfigAfter(cfg []byte, timeout time.Duration) error {
cfgProvider, err := r.LoadAndValidateConfig(cfg)
if err != nil {
return err
}

r.CancelConfigRollbackTimeout()

r.rollbackTimer = time.AfterFunc(timeout, func() {
log.Println("rolling back the configuration")

if err := r.SetConfig(cfgProvider); err != nil {
log.Printf("config rollback failed %s", err)
}
})

return nil
}

// CancelConfigRollbackTimeout implements the Runtime interface.
func (r *Runtime) CancelConfigRollbackTimeout() {
r.rollbackTimerMu.Lock()
defer r.rollbackTimerMu.Unlock()

if r.rollbackTimer != nil {
r.rollbackTimer.Stop()
r.rollbackTimer = nil
}
}

// SetConfig implements the Runtime interface.
func (r *Runtime) SetConfig(cfg config.Provider) error {
r.c = cfg
Expand Down
2 changes: 2 additions & 0 deletions internal/app/maintenance/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ func (s *Server) Register(obj *grpc.Server) {
func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) {
//nolint:exhaustive
switch in.Mode {
case machine.ApplyConfigurationRequest_TRY:
fallthrough
case machine.ApplyConfigurationRequest_REBOOT:
fallthrough
case machine.ApplyConfigurationRequest_AUTO:
Expand Down
Loading

0 comments on commit 2b03057

Please sign in to comment.