Skip to content

Commit

Permalink
Revert "tests/ingition/kdump: add a remote NFS kdump test"
Browse files Browse the repository at this point in the history
This reverts commit b10d8dc.

The test passes on F40 but not on F41+ [1] and also it is failing
on RHCOS so let's just yank it for now and re-apply when it's
confirmed to be passing everywhere.

[1] coreos/fedora-coreos-tracker#1820
  • Loading branch information
dustymabe committed Oct 25, 2024
1 parent 7653b93 commit af1468c
Showing 1 changed file with 25 additions and 159 deletions.
184 changes: 25 additions & 159 deletions mantle/kola/tests/ignition/kdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,63 +28,6 @@ func init() {
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
Platforms: []string{"qemu"},
})
register.RegisterTest(&register.Test{
Run: kdumpNFSTest,
ClusterSize: 0,
Name: `kdump.crash.nfs`,
Description: "Verifies kdump logs are exported to NFS destination",
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
Platforms: []string{"qemu"},
})
}

// This function test the remote kdump feature by:
// - making sure kdump is ready
// - crashing machine
// - monitoring the expected vmcore path
func testRemoteKdump(c cluster.TestCluster, kdump_machine platform.Machine, remote_machine platform.Machine, crash_path string) {

// Wait for kdump to become active
// 3 minutes should be enough to generate the kdump initramfs
err := util.Retry(12, 15*time.Second, func() error {

kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")

if err != nil {
return err
} else if string(kdump_status) == "inactive" {
return fmt.Errorf("Kdump.service is not ready: %s.", string(kdump_status))
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
}

// crash the kernel
// use systemd-run because direclty calling `echo c > ...` will always
// throw an error as the kernel immediately hangs.
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
if err != nil {
c.Fatalf("failed to queue kernel crash: %v", err)
}

// Wait for kdump to create vmcore dump on the remote host
err = util.Retry(5, 10*time.Second, func() error {

// Look for the crash files created on the SSH machine
logs, err := c.SSH(remote_machine, fmt.Sprintf("find %s -type f -name vmcore*", crash_path))

if err != nil {
return fmt.Errorf("failed to search for vmcore: %w", err)
} else if logs == nil {
return fmt.Errorf("No vmcore created on remote host")
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
}
}

// The destination VM for kdump logs
Expand Down Expand Up @@ -237,122 +180,45 @@ kernel_arguments:
c.Fatalf("Unable to create test machine: %v", err)
}

testRemoteKdump(c, kdump_machine, ssh_host.Machine, "/home/core/crash")
}

// The destination VM for kdump logs over NFS
type NfsServer struct {
Machine platform.Machine
MachineAddress string
}

func setupNFSMachine(c cluster.TestCluster) NfsServer {
var m platform.Machine
var err error

options := platform.QemuMachineOptions{
HostForwardPorts: []platform.HostForwardPort{
{Service: "ssh", HostPort: 0, GuestPort: 22},
// Kdump NFS option does not allow a custom port
{Service: "nfs", HostPort: 2049, GuestPort: 2049},
},
}
// Wait for kdump to become active
// 3 minutes should be enough to generate the kdump initramfs
err = util.Retry(12, 15*time.Second, func() error {

nfs_server_butane := conf.Butane(`variant: fcos
version: 1.5.0
storage:
files:
- path: /etc/containers/systemd/nfs.container
overwrite: true
contents:
inline: |
[Container]
Image=quay.io/openshifttest/nfs-server
Volume=/var/nfs:/mnt/data
PublishPort=2049:2049
PodmanArgs=--privileged
[Install]
WantedBy=default.target
directories:
- path: /var/nfs/crash`)
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")

// start the machine
switch c := c.Cluster.(type) {
// These cases have to be separated because when put together to the same case statement
// the golang compiler no longer checks that the individual types in the case have the
// NewMachineWithQemuOptions function, but rather whether platform.Cluster
// does which fails
case *qemu.Cluster:
m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options)
default:
panic("unreachable")
}
if err != nil {
return err
} else if string(kdump_status) == "inactive" {
return fmt.Errorf(fmt.Sprintf("Kdump.service is not ready: %s.", string(kdump_status)))
}
return nil
})
if err != nil {
c.Fatal(err)
}

return NfsServer{
Machine: m,
MachineAddress: "10.0.2.2",
}
}

func kdumpNFSTest(c cluster.TestCluster) {
nfs_host := setupNFSMachine(c)

butane := conf.Butane(fmt.Sprintf(`variant: fcos
version: 1.5.0
storage:
files:
- path: /etc/kdump.conf
overwrite: true
contents:
inline: |
nfs %s:/
path /crash
core_collector makedumpfile -l --message-level 1 -d 31
extra_bins /sbin/mount.nfs
extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files
systemd:
units:
- name: kdump.service
enabled: true
dropins:
- name: debug.conf
contents: |
[Service]
Environment="debug=1"
kernel_arguments:
should_exist:
- crashkernel=512M`,
nfs_host.MachineAddress))

opts := platform.MachineOptions{
MinMemory: 2048,
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
}

kdump_machine, err := c.NewMachineWithOptions(butane, opts)
// crash the kernel
// use systemd-run because direclty calling `echo c...` will alaways
// throw an error as the kernel immediately hangs.
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
if err != nil {
c.Fatalf("Unable to create test machine: %v", err)
c.Fatalf("failed to queue kernel crash: %v", err)
}

// XXX Refactor this
// Wait for nfs server to become active
// 1 minutes should be enough to pull the container image
err = util.Retry(4, 15*time.Second, func() error {
// Wait for kdump to create vmcore dump on the remote host
err = util.Retry(5, 10*time.Second, func() error {

nfs_status, err := c.SSH(nfs_host.Machine, "systemctl is-active nfs.service")
// Look for the crash files created on the SSH machine
logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*")

if err != nil {
return err
} else if string(nfs_status) == "inactive" {
return fmt.Errorf("nfs.service is not ready: %s.", string(nfs_status))
return fmt.Errorf("failed to search for vmcore: %w", err)
} else if logs == nil {
return fmt.Errorf("No vmcore created on remote SSH host")
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for nfs.service to be ready: %v", err)
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
}

testRemoteKdump(c, kdump_machine, nfs_host.Machine, "/var/nfs/crash")
}

0 comments on commit af1468c

Please sign in to comment.