From 545f75fd7ae913aa3a8a097fdc9b2bfbd96c914b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 6 Sep 2024 19:41:19 +0400 Subject: [PATCH] feat: acquire machine config inline from kernel cmdline Fixes #9175 Signed-off-by: Andrey Smirnov --- hack/release.toml | 6 + .../pkg/controllers/config/acquire.go | 105 +++++++++++++++++- .../pkg/controllers/config/acquire_test.go | 80 +++++++++++++ .../runtime/v1alpha2/v1alpha2_controller.go | 2 + pkg/machinery/constants/constants.go | 5 + website/content/v1.8/reference/kernel.md | 13 +++ 6 files changed, 209 insertions(+), 2 deletions(-) diff --git a/hack/release.toml b/hack/release.toml index 9aa7cfa156..4525023fa0 100644 --- a/hack/release.toml +++ b/hack/release.toml @@ -235,6 +235,12 @@ Talos Linux now supports [configuration](https://www.talos.dev/v1.8/talos-guides title = "KubeSpan" description = """\ Extra announced endpoints can be added using the [`KubespanEndpointsConfig` document](https://www.talos.dev/v1.8/talos-guides/network/kubespan/#configuration). +""" + + [notes.cmdline-config] + title = "Machine Configuration via Kernel Command Line" + description = """\ +Talos Linux supports supplying zstd-compressed, base64-encoded machine configuration small documents via the kernel command line parameter `talos.config.inline`. """ [make_deps] diff --git a/internal/app/machined/pkg/controllers/config/acquire.go b/internal/app/machined/pkg/controllers/config/acquire.go index 242ba29e28..a9b4bd9038 100644 --- a/internal/app/machined/pkg/controllers/config/acquire.go +++ b/internal/app/machined/pkg/controllers/config/acquire.go @@ -8,16 +8,20 @@ import ( "bytes" "compress/gzip" "context" + "encoding/base64" "errors" "fmt" "io" "net/http" "os" + "strings" "github.com/cosi-project/runtime/pkg/controller" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" + "github.com/klauspost/compress/zstd" "github.com/siderolabs/gen/optional" + "github.com/siderolabs/go-procfs/procfs" "go.uber.org/zap" talosruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" @@ -49,10 +53,17 @@ type Setter interface { SetConfig(config.Provider) error } +// ModeGetter gets the current runtime mode. +type ModeGetter interface { + InContainer() bool +} + // AcquireController loads the machine configuration from multiple sources. type AcquireController struct { PlatformConfiguration PlatformConfigurator PlatformEvent PlatformEventer + Mode ModeGetter + CmdlineGetter func() *procfs.Cmdline ConfigSetter Setter EventPublisher talosruntime.Publisher ValidationMode validation.RuntimeMode @@ -261,7 +272,7 @@ func (ctrl *AcquireController) loadFromDisk(logger *zap.Logger) (config.Provider // // Transitions: // -// --> maintenanceEnter: config loaded from platform, but it's incomplete, or no config from platform: proceed to maintenance +// --> cmdline: config loaded from platform, but it's incomplete, or no config from platform: proceed to cmdline // --> done: config loaded from platform, and it's complete func (ctrl *AcquireController) statePlatform(ctx context.Context, r controller.Runtime, logger *zap.Logger) (stateMachineFunc, config.Provider, error) { cfg, err := ctrl.loadFromPlatform(ctx, logger) @@ -278,7 +289,7 @@ func (ctrl *AcquireController) statePlatform(ctx context.Context, r controller.R fallthrough case !cfg.CompleteForBoot(): // incomplete or missing config, proceed to maintenance - return ctrl.stateMaintenanceEnter, cfg, nil + return ctrl.stateCmdline, cfg, nil default: // complete config, we are done return ctrl.stateDone, cfg, nil @@ -341,6 +352,96 @@ func (ctrl *AcquireController) loadFromPlatform(ctx context.Context, logger *zap return cfg, nil } +// stateCmdline acquires machine configuration from the kernel cmdline source. +// +// Transitions: +// +// --> maintenanceEnter: config loaded from cmdline, but it's incomplete, or no config from platform: proceed to maintenance +// --> done: config loaded from cmdline, and it's complete +func (ctrl *AcquireController) stateCmdline(ctx context.Context, r controller.Runtime, logger *zap.Logger) (stateMachineFunc, config.Provider, error) { + if ctrl.Mode.InContainer() { + // no cmdline in containers + return ctrl.stateMaintenanceEnter, nil, nil + } + + cfg, err := ctrl.loadFromCmdline(logger) + if err != nil { + return nil, nil, err + } + + if cfg != nil { + ctrl.configSourcesUsed = append(ctrl.configSourcesUsed, "cmdline") + } + + switch { + case cfg == nil: + fallthrough + case !cfg.CompleteForBoot(): + // incomplete or missing config, proceed to maintenance + return ctrl.stateMaintenanceEnter, cfg, nil + default: + // complete config, we are done + return ctrl.stateDone, cfg, nil + } +} + +// loadFromCmdline is a helper function for stateCmdline. +func (ctrl *AcquireController) loadFromCmdline(logger *zap.Logger) (config.Provider, error) { + cmdline := ctrl.CmdlineGetter() + + param := cmdline.Get(constants.KernelParamConfigInline) + + if param == nil { + return nil, nil + } + + logger.Info("getting config from cmdline", zap.String("param", constants.KernelParamConfigInline)) + + var cfgEncoded strings.Builder + + for i := 0; ; i++ { + v := param.Get(i) + if v == nil { + break + } + + cfgEncoded.WriteString(*v) + } + + cfgDecoded, err := base64.StdEncoding.DecodeString(cfgEncoded.String()) + if err != nil { + return nil, fmt.Errorf("failed to decode base64 config from cmdline %s: %w", constants.KernelParamConfigInline, err) + } + + zr, err := zstd.NewReader(bytes.NewReader(cfgDecoded)) + if err != nil { + return nil, fmt.Errorf("failed to create zstd reader: %w", err) + } + + defer zr.Close() + + cfgBytes, err := io.ReadAll(zr) + if err != nil { + return nil, fmt.Errorf("failed to read zstd compressed config from cmdline %s: %w", constants.KernelParamConfigInline, err) + } + + cfg, err := configloader.NewFromBytes(cfgBytes) + if err != nil { + return nil, fmt.Errorf("failed to load config via cmdline %s: %w", constants.KernelParamConfigInline, err) + } + + warnings, err := cfg.Validate(ctrl.ValidationMode) + if err != nil { + return nil, fmt.Errorf("failed to validate config acquired via cmdline %s: %w", constants.KernelParamConfigInline, err) + } + + for _, warning := range warnings { + logger.Warn("config validation warning", zap.String("cmdline", constants.KernelParamConfigInline), zap.String("warning", warning)) + } + + return cfg, nil +} + // stateMaintenanceEnter initializes maintenance service. // // Transitions: diff --git a/internal/app/machined/pkg/controllers/config/acquire_test.go b/internal/app/machined/pkg/controllers/config/acquire_test.go index dfdaffb310..dface32b5f 100644 --- a/internal/app/machined/pkg/controllers/config/acquire_test.go +++ b/internal/app/machined/pkg/controllers/config/acquire_test.go @@ -8,6 +8,7 @@ import ( "bytes" "compress/gzip" "context" + "encoding/base64" stderrors "errors" "fmt" "math/rand/v2" @@ -22,6 +23,8 @@ import ( "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/resource/rtestutils" "github.com/cosi-project/runtime/pkg/state" + "github.com/klauspost/compress/zstd" + "github.com/siderolabs/go-procfs/procfs" "github.com/siderolabs/go-retry/retry" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" @@ -37,6 +40,7 @@ import ( "github.com/siderolabs/talos/pkg/machinery/config/generate" "github.com/siderolabs/talos/pkg/machinery/config/machine" "github.com/siderolabs/talos/pkg/machinery/config/types/siderolink" + "github.com/siderolabs/talos/pkg/machinery/constants" "github.com/siderolabs/talos/pkg/machinery/proto" configresource "github.com/siderolabs/talos/pkg/machinery/resources/config" "github.com/siderolabs/talos/pkg/machinery/resources/runtime" @@ -51,6 +55,7 @@ type AcquireSuite struct { platformEvent *platformEventMock configSetter *configSetterMock eventPublisher *eventPublisherMock + cmdline *cmdlineGetterMock clusterName string completeMachineConfig []byte @@ -118,6 +123,16 @@ func (e *eventPublisherMock) getEvents() []proto.Message { return slices.Clone(e.events) } +type cmdlineGetterMock struct { + cmdline *procfs.Cmdline +} + +func (c *cmdlineGetterMock) Getter() func() *procfs.Cmdline { + return func() *procfs.Cmdline { + return c.cmdline + } +} + type validationModeMock struct{} func (v validationModeMock) String() string { @@ -152,6 +167,9 @@ func TestAcquireSuite(t *testing.T) { cfgCh: make(chan config.Provider, 1), } s.eventPublisher = &eventPublisherMock{} + s.cmdline = &cmdlineGetterMock{ + procfs.NewCmdline(""), + } s.clusterName = fmt.Sprintf("cluster-%d", rand.Int32()) input, err := generate.NewInput(s.clusterName, "https://localhost:6443", "") @@ -176,6 +194,8 @@ func TestAcquireSuite(t *testing.T) { PlatformConfiguration: s.platformConfig, PlatformEvent: s.platformEvent, ConfigSetter: s.configSetter, + Mode: validationModeMock{}, + CmdlineGetter: s.cmdline.Getter(), EventPublisher: s.eventPublisher, ValidationMode: validationModeMock{}, ConfigPath: s.configPath, @@ -429,6 +449,66 @@ func (suite *AcquireSuite) TestFromPlatformToMaintenance() { ) } +func (suite *AcquireSuite) TestFromCmdlineToMaintenance() { + var cfgCompressed bytes.Buffer + + zw, err := zstd.NewWriter(&cfgCompressed) + suite.Require().NoError(err) + + _, err = zw.Write(suite.partialMachineConfig) + suite.Require().NoError(err) + + suite.Require().NoError(zw.Close()) + + cfgEncoded := base64.StdEncoding.EncodeToString(cfgCompressed.Bytes()) + + suite.cmdline.cmdline = procfs.NewCmdline(fmt.Sprintf("%s=%s", constants.KernelParamConfigInline, cfgEncoded)) + + suite.triggerAcquire() + + var cfg config.Provider + + select { + case cfg = <-suite.configSetter.cfgCh: + case <-suite.Ctx().Done(): + suite.Require().Fail("timed out waiting for config") + } + + suite.Require().Equal(cfg.SideroLink().APIUrl().Host, "siderolink.api") + + suite.injectViaMaintenance(suite.completeMachineConfig) + + cfg = suite.waitForConfig() + suite.Require().Equal(cfg.Cluster().Name(), suite.clusterName) + + suite.Assert().Equal( + []proto.Message{ + &machineapi.TaskEvent{ + Action: machineapi.TaskEvent_START, + Task: "runningMaintenance", + }, + &machineapi.TaskEvent{ + Action: machineapi.TaskEvent_STOP, + Task: "runningMaintenance", + }, + }, + suite.eventPublisher.getEvents(), + ) + suite.Assert().Equal( + []platform.Event{ + { + Type: platform.EventTypeActivate, + Message: "Talos booted into maintenance mode. Ready for user interaction.", + }, + { + Type: platform.EventTypeConfigLoaded, + Message: "Talos machine config loaded successfully.", + }, + }, + suite.platformEvent.getEvents(), + ) +} + func (suite *AcquireSuite) TestFromMaintenance() { suite.triggerAcquire() diff --git a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go index 0a9eb8c060..f7cbc9e6a2 100644 --- a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go +++ b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go @@ -117,6 +117,8 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error PlatformEvent: &platformEventer{ platform: ctrl.v1alpha1Runtime.State().Platform(), }, + Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(), + CmdlineGetter: procfs.ProcCmdline, ConfigSetter: ctrl.v1alpha1Runtime, EventPublisher: ctrl.v1alpha1Runtime.Events(), ValidationMode: ctrl.v1alpha1Runtime.State().Platform().Mode(), diff --git a/pkg/machinery/constants/constants.go b/pkg/machinery/constants/constants.go index 39d3f647d4..0d7b938287 100644 --- a/pkg/machinery/constants/constants.go +++ b/pkg/machinery/constants/constants.go @@ -23,6 +23,11 @@ const ( // to the config. KernelParamConfig = "talos.config" + // KernelParamConfigInline is the kernel parameter name for specifying the inline config. + // + // The inline config should be base64 encoded and zstd-compressed. + KernelParamConfigInline = "talos.config.inline" + // KernelParamConfigOAuthClientID is the kernel parameter name for specifying the OAuth2 client ID. KernelParamConfigOAuthClientID = "talos.config.oauth.client_id" diff --git a/website/content/v1.8/reference/kernel.md b/website/content/v1.8/reference/kernel.md index 8c961f14ae..501e857284 100644 --- a/website/content/v1.8/reference/kernel.md +++ b/website/content/v1.8/reference/kernel.md @@ -145,6 +145,19 @@ mkisofs -joliet -rock -volid 'metal-iso' -output config.iso iso/ Kernel parameters prefixed with `talos.config.auth.` are used to configure [OAuth2 authentication for the machine configuration]({{< relref "../advanced/machine-config-oauth" >}}). +#### `talos.config.inline` + +The kernel parameter `talos.config.inline` can be used to provide initial minimal machine configuration directly on the kernel command line, when other means of providing the configuration are not available. +The machine configuration should be `zstd` compressed and base64-encoded to be passed as a kernel parameter. + +> Note: The kernel command line has a limited size (4096 bytes), so this method is only suitable for small configuration documents. + +One such example is to provide [a custom CA certificate]({{< relref "../talos-guides/configuration/certificate-authorities" >}}) via `TrustedRootsConfig` in the machine configuration: + +```shell +cat config.yaml | zstd --compress --ultra -22 | base64 -w 0 +``` + #### `talos.platform` The platform name on which Talos will run.