Skip to content

Commit

Permalink
MCO-356: daemon/firstboot: Do a secondary in-place update if rpm-ostr…
Browse files Browse the repository at this point in the history
…ee is too old

xref coreos/rpm-ostree@89f5802

Change the logic for firstboot OS updates to detect if rpm-ostree is too
old to natively fetch a container image on its own.  If so,
we use `ex deploy-from-self` via podman.

Introduce a new `/etc/machine-config-daemon-force-once` file that
will cause us to skip validation, and hence we should re-reconcile
and attempt to apply the OS upgrade again, this time natively
via rpm-ostree.

This is needed for scaleup of old nodes, as well as temporarily
for 4.11 upgrades.
  • Loading branch information
cgwalters authored and jkyros committed Sep 27, 2022
1 parent e26e51e commit b63b319
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 9 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.66.6 // indirect
gopkg.in/square/go-jose.v2 v2.6.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1 // indirect
honnef.co/go/tools v0.3.3 // indirect
k8s.io/apiserver v0.25.1 // indirect
Expand Down
45 changes: 40 additions & 5 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ type Daemon struct {
// booting is true when all initial synchronization to the target
// machineconfig is done
booting bool
// needSecondaryReboot is used for https://issues.redhat.com/browse/MCO-356
// when we have an old rpm-ostree and need to reboot into the new one.
needSecondaryReboot bool

currentConfigPath string

Expand Down Expand Up @@ -884,6 +887,31 @@ func (dn *Daemon) RunFirstbootCompleteMachineconfig() error {
if err != nil {
return fmt.Errorf("failed to parse MachineConfig: %w", err)
}
newEnough, err := RpmOstreeIsNewEnoughForLayering()
if err != nil {
return err
}

// If the host isn't new enough to understand the new container model natively, run as a privileged container.
// See https://github.com/coreos/rpm-ostree/pull/3961 and https://issues.redhat.com/browse/MCO-356
// This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018
if !newEnough {
dn.logSystem("rpm-ostree is not new enough for new-format image; forcing an update via container and queuing immediate reboot")
err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", mc.Spec.OSImageURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host")
if err != nil {
return err
}
rebootCmd := rebootCommand("extra reboot for in-place update")
if err := rebootCmd.Run(); err != nil {
dn.logSystem("failed to run reboot: %v", err)
return err
}
// wait to be killed via SIGTERM
time.Sleep(defaultRebootTimeout)
return fmt.Errorf("failed to reboot for secondary in-place update")
}

glog.Info("rpm-ostree has container feature")

// Start with an empty config, then add our *booted* osImageURL to
// it, reflecting the current machine state.
Expand Down Expand Up @@ -1407,9 +1435,11 @@ func (dn *Daemon) checkStateOnFirstRun() error {
return err
}
var pendingConfigName, bootID string
var pendingSpecifiesForce bool
if pendingState != nil {
pendingConfigName = pendingState.Message
bootID = pendingState.BootID
pendingSpecifiesForce = pendingState.Force == "true"
}
// XXX: drop this
// we need this compatibility layer for now
Expand Down Expand Up @@ -1456,7 +1486,7 @@ func (dn *Daemon) checkStateOnFirstRun() error {
targetOSImageURL := state.currentConfig.Spec.OSImageURL
osMatch := dn.checkOS(targetOSImageURL)
if !osMatch {
glog.Infof("Bootstrap pivot required to: %s", targetOSImageURL)
dn.logSystem("Bootstrap pivot required to: %s", targetOSImageURL)

// Check to see if we have a layered/new format image
isLayeredImage, err := dn.NodeUpdaterClient.IsBootableImage(targetOSImageURL)
Expand Down Expand Up @@ -1487,7 +1517,7 @@ func (dn *Daemon) checkStateOnFirstRun() error {
}
return dn.reboot(fmt.Sprintf("Node will reboot into config %v", state.currentConfig.GetName()))
}
glog.Info("No bootstrap pivot required; unlinking bootstrap node annotations")
dn.logSystem("No bootstrap pivot required; unlinking bootstrap node annotations")

// Rename the bootstrap node annotations; the
// currentConfig's osImageURL should now be *truth*.
Expand Down Expand Up @@ -1532,8 +1562,13 @@ func (dn *Daemon) checkStateOnFirstRun() error {
expectedConfig = state.currentConfig
}

if forceFileExists() {
glog.Infof("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile)
force := pendingSpecifiesForce || forceFileExists()
if force {
if pendingSpecifiesForce {
dn.logSystem("Skipping on-disk validation; pending config specifies forcing")
} else {
dn.logSystem("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile)
}
return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig)
}

Expand All @@ -1543,7 +1578,7 @@ func (dn *Daemon) checkStateOnFirstRun() error {
return wErr
}

glog.Info("Validated on-disk state")
dn.logSystem("Validated on-disk state")

// We've validated state. Now, ensure that node is in desired state
var inDesiredConfig bool
Expand Down
42 changes: 42 additions & 0 deletions pkg/daemon/rpm-ostree.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/golang/glog"
"github.com/opencontainers/go-digest"
pivotutils "github.com/openshift/machine-config-operator/pkg/daemon/pivot/utils"
"gopkg.in/yaml.v2"
)

const (
Expand Down Expand Up @@ -346,6 +347,47 @@ func (r *RpmOstreeClient) IsBootableImage(imgURL string) (bool, error) {
return isBootableImage == "true", nil
}

// RpmOstreeIsNewEnoughForLayering returns true if the version of rpm-ostree on the
// host system is new enough for layering.
// VersionData represents the static information about rpm-ostree.
type VersionData struct {
Version string `yaml:"Version"`
Features []string `yaml:"Features"`
Git string `yaml:"Git"`
}

type RpmOstreeVersionData struct {
Root VersionData `yaml:"rpm-ostree"`
}

// RpmOstreeVersion returns the running rpm-ostree version number
func rpmOstreeVersion() (*VersionData, error) {
buf, err := runGetOut("rpm-ostree", "--version")
if err != nil {
return nil, err
}

var q RpmOstreeVersionData
if err := yaml.Unmarshal(buf, &q); err != nil {
return nil, fmt.Errorf("failed to parse `rpm-ostree --version` output: %w", err)
}

return &q.Root, nil
}

func RpmOstreeIsNewEnoughForLayering() (bool, error) {
verdata, err := rpmOstreeVersion()
if err != nil {
return false, err
}
for _, v := range verdata.Features {
if v == "container" {
return true, nil
}
}
return false, nil
}

// RebaseLayered rebases system or errors if already rebased
func (r *RpmOstreeClient) RebaseLayered(imgURL string) (err error) {
glog.Infof("Executing rebase to %s", imgURL)
Expand Down
27 changes: 27 additions & 0 deletions pkg/daemon/rpm-ostree_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package daemon

import (
"testing"

"github.com/stretchr/testify/assert"
"gopkg.in/yaml.v2"
)

func TestParseVersion(t *testing.T) {
verdata := `rpm-ostree:
Version: '2022.10'
Git: 6b302116c969397fd71899e3b9bb3b8c100d1af9
Features:
- rust
- compose
- rhsm
`
var q RpmOstreeVersionData
if err := yaml.UnmarshalStrict([]byte(verdata), &q); err != nil {
panic(err)
}

assert.Equal(t, "2022.10", q.Root.Version)
assert.Contains(t, q.Root.Features, "rust")
assert.NotContains(t, q.Root.Features, "container")
}
31 changes: 28 additions & 3 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1908,8 +1908,31 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig, osImageContentDir strin
func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error {
newURL := config.Spec.OSImageURL
glog.Infof("Updating OS to layered image %s", newURL)
if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil {
return fmt.Errorf("failed to update OS to %s : %w", newURL, err)

newEnough, err := RpmOstreeIsNewEnoughForLayering()
if err != nil {
return err
}
// If the host isn't new enough to understand the new container model natively, run as a privileged container.
// See https://github.com/coreos/rpm-ostree/pull/3961 and https://issues.redhat.com/browse/MCO-356
// This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018
if !newEnough {
dn.logSystem("rpm-ostree is not new enough for layering; forcing an update via container")
err := runCmdSync("systemd-run", "--unit", "machine-config-daemon-update-rpmostree-via-container", "--collect", "--wait", "--", "podman", "run", "--authfile", "/var/lib/kubelet/config.json", "--privileged", "--pid=host", "--net=host", "--rm", "-v", "/:/run/host", newURL, "rpm-ostree", "ex", "deploy-from-self", "/run/host")
if err != nil {
return err
}
dn.needSecondaryReboot = true
// We'll need to do a second reconciliation pass i.e. an extra reboot today, but that won't
// be necessary after we ship the newer rpm-ostree into older releases.
// See also https://github.com/coreos/rpm-ostree/issues/4018
if err := os.WriteFile(constants.MachineConfigDaemonPersistentForceOnceFile, []byte(""), 0o644); err != nil {
return err
}
} else {
if err := dn.NodeUpdaterClient.RebaseLayered(newURL); err != nil {
return fmt.Errorf("failed to update OS to %s : %w", newURL, err)
}
}

return nil
Expand Down Expand Up @@ -1957,6 +1980,7 @@ type journalMsg struct {
Message string `json:"MESSAGE,omitempty"`
BootID string `json:"BOOT_ID,omitempty"`
Pending string `json:"PENDING,omitempty"`
Force string `json:"MCO_FORCE,omitempty"`
OldLogger string `json:"OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK,omitempty"` // unused today
}

Expand Down Expand Up @@ -2020,7 +2044,8 @@ func (dn *Daemon) storePendingState(pending *mcfgv1.MachineConfig, isPending int
pendingState.WriteString(fmt.Sprintf(`MESSAGE_ID=%s
MESSAGE=%s
BOOT_ID=%s
PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, isPending))
MCO_FORCE=%v
PENDING=%d`, pendingStateMessageID, pending.GetName(), dn.bootID, dn.needSecondaryReboot, isPending))

logger.Stdin = &pendingState
return logger.CombinedOutput()
Expand Down

0 comments on commit b63b319

Please sign in to comment.