Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 25 additions & 159 deletions mantle/kola/tests/ignition/kdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,63 +28,6 @@ func init() {
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
Platforms: []string{"qemu"},
})
register.RegisterTest(&register.Test{
Run: kdumpNFSTest,
ClusterSize: 0,
Name: `kdump.crash.nfs`,
Description: "Verifies kdump logs are exported to NFS destination",
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
Platforms: []string{"qemu"},
})
}

// This function test the remote kdump feature by:
// - making sure kdump is ready
// - crashing machine
// - monitoring the expected vmcore path
func testRemoteKdump(c cluster.TestCluster, kdump_machine platform.Machine, remote_machine platform.Machine, crash_path string) {

// Wait for kdump to become active
// 3 minutes should be enough to generate the kdump initramfs
err := util.Retry(12, 15*time.Second, func() error {

kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")

if err != nil {
return err
} else if string(kdump_status) == "inactive" {
return fmt.Errorf("Kdump.service is not ready: %s.", string(kdump_status))
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
}

// crash the kernel
// use systemd-run because direclty calling `echo c > ...` will always
// throw an error as the kernel immediately hangs.
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
if err != nil {
c.Fatalf("failed to queue kernel crash: %v", err)
}

// Wait for kdump to create vmcore dump on the remote host
err = util.Retry(5, 10*time.Second, func() error {

// Look for the crash files created on the SSH machine
logs, err := c.SSH(remote_machine, fmt.Sprintf("find %s -type f -name vmcore*", crash_path))

if err != nil {
return fmt.Errorf("failed to search for vmcore: %w", err)
} else if logs == nil {
return fmt.Errorf("No vmcore created on remote host")
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
}
}

// The destination VM for kdump logs
Expand Down Expand Up @@ -237,122 +180,45 @@ kernel_arguments:
c.Fatalf("Unable to create test machine: %v", err)
}

testRemoteKdump(c, kdump_machine, ssh_host.Machine, "/home/core/crash")
}

// The destination VM for kdump logs over NFS
type NfsServer struct {
Machine platform.Machine
MachineAddress string
}

func setupNFSMachine(c cluster.TestCluster) NfsServer {
var m platform.Machine
var err error

options := platform.QemuMachineOptions{
HostForwardPorts: []platform.HostForwardPort{
{Service: "ssh", HostPort: 0, GuestPort: 22},
// Kdump NFS option does not allow a custom port
{Service: "nfs", HostPort: 2049, GuestPort: 2049},
},
}
// Wait for kdump to become active
// 3 minutes should be enough to generate the kdump initramfs
err = util.Retry(12, 15*time.Second, func() error {

nfs_server_butane := conf.Butane(`variant: fcos
version: 1.5.0
storage:
files:
- path: /etc/containers/systemd/nfs.container
overwrite: true
contents:
inline: |
[Container]
Image=quay.io/openshifttest/nfs-server
Volume=/var/nfs:/mnt/data
PublishPort=2049:2049
PodmanArgs=--privileged
[Install]
WantedBy=default.target
directories:
- path: /var/nfs/crash`)
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")

// start the machine
switch c := c.Cluster.(type) {
// These cases have to be separated because when put together to the same case statement
// the golang compiler no longer checks that the individual types in the case have the
// NewMachineWithQemuOptions function, but rather whether platform.Cluster
// does which fails
case *qemu.Cluster:
m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options)
default:
panic("unreachable")
}
if err != nil {
return err
} else if string(kdump_status) == "inactive" {
return fmt.Errorf(fmt.Sprintf("Kdump.service is not ready: %s.", string(kdump_status)))
}
return nil
})
if err != nil {
c.Fatal(err)
}

return NfsServer{
Machine: m,
MachineAddress: "10.0.2.2",
}
}

func kdumpNFSTest(c cluster.TestCluster) {
nfs_host := setupNFSMachine(c)

butane := conf.Butane(fmt.Sprintf(`variant: fcos
version: 1.5.0
storage:
files:
- path: /etc/kdump.conf
overwrite: true
contents:
inline: |
nfs %s:/
path /crash
core_collector makedumpfile -l --message-level 1 -d 31
extra_bins /sbin/mount.nfs
extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files
systemd:
units:
- name: kdump.service
enabled: true
dropins:
- name: debug.conf
contents: |
[Service]
Environment="debug=1"
kernel_arguments:
should_exist:
- crashkernel=512M`,
nfs_host.MachineAddress))

opts := platform.MachineOptions{
MinMemory: 2048,
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
}

kdump_machine, err := c.NewMachineWithOptions(butane, opts)
// crash the kernel
// use systemd-run because direclty calling `echo c...` will alaways
// throw an error as the kernel immediately hangs.
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
if err != nil {
c.Fatalf("Unable to create test machine: %v", err)
c.Fatalf("failed to queue kernel crash: %v", err)
}

// XXX Refactor this
// Wait for nfs server to become active
// 1 minutes should be enough to pull the container image
err = util.Retry(4, 15*time.Second, func() error {
// Wait for kdump to create vmcore dump on the remote host
err = util.Retry(5, 10*time.Second, func() error {

nfs_status, err := c.SSH(nfs_host.Machine, "systemctl is-active nfs.service")
// Look for the crash files created on the SSH machine
logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*")

if err != nil {
return err
} else if string(nfs_status) == "inactive" {
return fmt.Errorf("nfs.service is not ready: %s.", string(nfs_status))
return fmt.Errorf("failed to search for vmcore: %w", err)
} else if logs == nil {
return fmt.Errorf("No vmcore created on remote SSH host")
}
return nil
})
if err != nil {
c.Fatalf("Timed out while waiting for nfs.service to be ready: %v", err)
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
}

testRemoteKdump(c, kdump_machine, nfs_host.Machine, "/var/nfs/crash")
}
Loading