Skip to content

Commit

Permalink
fix(fleet): Add better traces for systemd operations (#29366)
Browse files Browse the repository at this point in the history
  • Loading branch information
BaptisteFoy authored and grantseltzer committed Oct 2, 2024
1 parent b759bce commit 093712a
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 71 deletions.
45 changes: 0 additions & 45 deletions pkg/fleet/installer/service/commands.go

This file was deleted.

7 changes: 1 addition & 6 deletions pkg/fleet/installer/service/datadog_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func SetupAgent(ctx context.Context, _ []string) (err error) {
span, ctx := tracer.StartSpanFromContext(ctx, "setup_agent")
defer func() {
if err != nil {
log.Errorf("Failed to setup agent: %s, reverting", err)
log.Errorf("Failed to setup agent, reverting: %s", err)
err = errors.Join(err, RemoveAgent(ctx))
}
span.Finish(tracer.WithError(err))
Expand Down Expand Up @@ -195,11 +195,6 @@ func stopOldAgentUnits(ctx context.Context) error {
defer span.Finish()
for _, unit := range stableUnits {
if err := stopUnit(ctx, unit); err != nil {
exitError, ok := err.(*exec.ExitError)
if ok && exitError.ExitCode() == 5 {
// exit code 5 means the unit is not loaded, we can continue
continue
}
return fmt.Errorf("failed to stop %s: %v", unit, err)
}
if err := disableUnit(ctx, unit); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/fleet/installer/service/datadog_installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func addDDAgentGroup(ctx context.Context) error {
func SetupInstaller(ctx context.Context) (err error) {
defer func() {
if err != nil {
log.Errorf("Failed to setup installer: %s, reverting", err)
log.Errorf("Failed to setup installer, reverting: %s", err)
err = RemoveInstaller(ctx)
}
}()
Expand Down
78 changes: 59 additions & 19 deletions pkg/fleet/installer/service/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ package service

import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path"
"path/filepath"

Expand All @@ -22,39 +24,71 @@ import (

const systemdPath = "/etc/systemd/system"

func stopUnit(ctx context.Context, unit string, args ...string) error {
func stopUnit(ctx context.Context, unit string, args ...string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "stop_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
args = append([]string{"stop", unit}, args...)
return newCommandRunner(ctx, "systemctl", args...).runWithError()
err = exec.CommandContext(ctx, "systemctl", args...).Run()
exitErr := &exec.ExitError{}
if !errors.As(err, &exitErr) {
return err
}
span.SetTag("exit_code", exitErr.ExitCode())
if exitErr.ExitCode() == 5 {
// exit code 5 means the unit is not loaded, we can continue
return nil
}
return errors.New(string(exitErr.Stderr))
}

func startUnit(ctx context.Context, unit string, args ...string) error {
func startUnit(ctx context.Context, unit string, args ...string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "start_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
args = append([]string{"start", unit}, args...)
return newCommandRunner(ctx, "systemctl", args...).runWithError()
err = exec.CommandContext(ctx, "systemctl", args...).Run()
exitErr := &exec.ExitError{}
if !errors.As(err, &exitErr) {
return err
}
span.SetTag("exit_code", exitErr.ExitCode())
return errors.New(string(exitErr.Stderr))
}

func enableUnit(ctx context.Context, unit string) error {
func enableUnit(ctx context.Context, unit string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "enable_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
return newCommandRunner(ctx, "systemctl", "enable", unit).runWithError()
err = exec.CommandContext(ctx, "systemctl", "enable", unit).Run()
exitErr := &exec.ExitError{}
if !errors.As(err, &exitErr) {
return err
}
span.SetTag("exit_code", exitErr.ExitCode())
return errors.New(string(exitErr.Stderr))
}

func disableUnit(ctx context.Context, unit string) error {
func disableUnit(ctx context.Context, unit string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "disable_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
return newCommandRunner(ctx, "systemctl", "disable", unit).runWithError()
err = exec.CommandContext(ctx, "systemctl", "disable", unit).Run()
exitErr := &exec.ExitError{}
if !errors.As(err, &exitErr) {
return err
}
span.SetTag("exit_code", exitErr.ExitCode())
if exitErr.ExitCode() == 5 {
// exit code 5 means the unit is not loaded, we can continue
return nil
}
return errors.New(string(exitErr.Stderr))
}

func loadUnit(ctx context.Context, unit string) error {
func loadUnit(ctx context.Context, unit string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "load_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
content, err := embedded.FS.ReadFile(unit)
if err != nil {
Expand All @@ -64,17 +98,23 @@ func loadUnit(ctx context.Context, unit string) error {
return os.WriteFile(unitPath, content, 0644)
}

func removeUnit(ctx context.Context, unit string) error {
func removeUnit(ctx context.Context, unit string) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "remove_unit")
defer span.Finish()
defer func() { span.Finish(tracer.WithError(err)) }()
span.SetTag("unit", unit)
return os.Remove(path.Join(systemdPath, unit))
}

func systemdReload(ctx context.Context) error {
func systemdReload(ctx context.Context) (err error) {
span, _ := tracer.StartSpanFromContext(ctx, "systemd_reload")
defer span.Finish()
return newCommandRunner(ctx, "systemctl", "daemon-reload").runWithError()
defer func() { span.Finish(tracer.WithError(err)) }()
err = exec.CommandContext(ctx, "systemctl", "daemon-reload").Run()
exitErr := &exec.ExitError{}
if !errors.As(err, &exitErr) {
return err
}
span.SetTag("exit_code", exitErr.ExitCode())
return errors.New(string(exitErr.Stderr))
}

// isSystemdRunning checks if systemd is running using the documented way
Expand Down
26 changes: 26 additions & 0 deletions test/new-e2e/tests/installer/package_agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,32 @@ func (s *packageAgentSuite) TestRunPath() {
assert.True(s.T(), strings.HasPrefix(runPath, "/opt/datadog-packages/datadog-agent/"), "run_path is not in the expected location: %s", runPath)
}

func (s *packageAgentSuite) TestUpgrade_DisabledAgentDebRPM_to_OCI() {
// install deb/rpm agent
s.RunInstallScript(envForceNoInstall("datadog-agent"))
s.host.AssertPackageInstalledByPackageManager("datadog-agent")

defer s.Purge()
defer s.purgeAgentDebInstall()

state := s.host.State()
s.assertUnits(state, true)
state.AssertDirExists("/opt/datadog-agent", 0755, "dd-agent", "dd-agent")

// disable the unit
s.host.Run("sudo systemctl disable datadog-agent")

// install OCI agent
s.RunInstallScript(envForceInstall("datadog-agent"))

state = s.host.State()
s.assertUnits(state, false)
s.host.AssertPackageInstalledByInstaller("datadog-agent")
s.host.AssertPackageInstalledByPackageManager("datadog-agent")

s.host.Run("sudo systemctl show datadog-agent -p ExecStart | grep /opt/datadog-packages")
}

func (s *packageAgentSuite) purgeAgentDebInstall() {
pkgManager := s.host.GetPkgManager()
switch pkgManager {
Expand Down

0 comments on commit 093712a

Please sign in to comment.