diff --git a/libcontainer/seccomp/patchbpf/enosys_linux_test.go b/libcontainer/seccomp/patchbpf/enosys_linux_test.go new file mode 100644 index 00000000000..34515618b21 --- /dev/null +++ b/libcontainer/seccomp/patchbpf/enosys_linux_test.go @@ -0,0 +1,280 @@ +// +build linux,cgo,seccomp + +package patchbpf + +import ( + "bytes" + "encoding/binary" + "fmt" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" + + libseccomp "github.com/seccomp/libseccomp-golang" + "golang.org/x/net/bpf" +) + +type seccompData struct { + Syscall uint32 // NOTE: We assume sizeof(int) == 4. + Arch uint32 + IP uint64 + Args [6]uint64 +} + +// mockSyscallPayload creates a fake seccomp_data struct with the given data. +func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch nativeArch, args ...uint64) []byte { + var buf bytes.Buffer + + data := seccompData{ + Syscall: uint32(sysno), + Arch: uint32(arch), + IP: 0xDEADBEEFCAFE, + } + + copy(data.Args[:], args) + if len(args) > 6 { + t.Fatalf("bad syscall payload: linux only supports 6-argument syscalls") + } + + // NOTE: We use BigEndian here because golang.org/x/net/bpf assumes that + // all payloads are big-endian while seccomp uses host endianness. + if err := binary.Write(&buf, binary.BigEndian, data); err != nil { + t.Fatalf("bad syscall payload: cannot write data: %v", err) + } + return buf.Bytes() +} + +// retFallthrough is returned by the mockFilter. If a the mock filter returns +// this value, it indicates "fallthrough to libseccomp-generated filter". +const retFallthrough uint32 = 0xDEADBEEF + +// mockFilter returns a BPF VM that contains a mock filter with an -ENOSYS +// stub. If the filter returns retFallthrough, the stub filter has permitted +// the syscall to pass. +func mockFilter(t *testing.T, config *configs.Seccomp) *bpf.VM { + patch, err := generatePatch(config) + if err != nil { + t.Fatalf("mock filter: generate enosys patch: %v", err) + } + + program := append(patch, bpf.RetConstant{Val: retFallthrough}) + for idx, insn := range program { + t.Logf(" [%4.1d] %s", idx, insn) + } + + vm, err := bpf.NewVM(program) + if err != nil { + t.Fatalf("mock filter: compile BPF VM: %v", err) + } + return vm +} + +// fakeConfig generates a fake libcontainer seccomp configuration. The syscalls +// are added with an action distinct from the default action. +func fakeConfig(defaultAction configs.Action, explicitSyscalls []string, arches []string) *configs.Seccomp { + config := configs.Seccomp{ + DefaultAction: defaultAction, + Architectures: arches, + } + syscallAction := configs.Allow + if syscallAction == defaultAction { + syscallAction = configs.Kill + } + for _, syscall := range explicitSyscalls { + config.Syscalls = append(config.Syscalls, &configs.Syscall{ + Name: syscall, + Action: syscallAction, + }) + } + return &config +} + +// List copied from . +var testArches = []string{ + "x86", + "amd64", + "x32", + "arm", + "arm64", + "mips", + "mips64", + "mips64n32", + "mipsel", + "mipsel64", + "mipsel64n32", + "ppc", + "ppc64", + "ppc64le", + "s390", + "s390x", +} + +func archStringToNative(arch string) (nativeArch, error) { + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + return 0, fmt.Errorf("unknown architecture %q: %v", arch, err) + } + return archToNative(scmpArch) +} + +func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) { + explicitSyscalls := []string{ + "setns", + "kcmp", + "renameat2", + "openat2", + } + + implicitSyscalls := []string{ + "clone", + "openat", + "read", + "write", + } + + futureSyscalls := []libseccomp.ScmpSyscall{1000, 7331} + + // Quick lookups for which arches are enabled. + archSet := map[string]bool{} + for _, arch := range arches { + archSet[arch] = true + } + + for _, test := range []struct { + start, end int + }{ + {0, 1}, // [setns] + {0, 2}, // [setns, process_vm_readv] + {1, 2}, // [process_vm_readv] + {1, 3}, // [process_vm_readv, renameat2, openat2] + {1, 4}, // [process_vm_readv, renameat2, openat2] + {3, 4}, // [openat2] + } { + config := fakeConfig(defaultAction, explicitSyscalls[test.start:test.end], arches) + filter := mockFilter(t, config) + + // The syscalls are in increasing order of newness, so all syscalls + // after the last allowed syscall will give -ENOSYS. + enosysStart := test.end + + for _, arch := range testArches { + type syscallTest struct { + syscall string + sysno libseccomp.ScmpSyscall + expected int + } + + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + t.Fatalf("unknown libseccomp architecture %q: %v", arch, err) + } + + nativeArch, err := archToNative(scmpArch) + if err != nil { + t.Fatalf("unknown audit architecture %q: %v", arch, err) + } + + var syscallTests []syscallTest + + // Add explicit syscalls (whether they will return -ENOSYS + // depends on the filter rules). + for idx, syscall := range explicitSyscalls { + expected := int(retFallthrough) + if idx >= enosysStart { + expected = int(retErrnoEnosys) + } + sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch) + if err != nil { + t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err) + } + syscallTests = append(syscallTests, syscallTest{ + syscall, + sysno, + expected, + }) + } + + // Add implicit syscalls. + for _, syscall := range implicitSyscalls { + sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch) + if err != nil { + t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err) + } + syscallTests = append(syscallTests, syscallTest{ + sysno: sysno, + syscall: syscall, + expected: int(retFallthrough), + }) + } + + // Add future syscalls. + for _, sysno := range futureSyscalls { + baseSysno, err := libseccomp.GetSyscallFromNameByArch("openat2", scmpArch) + if err != nil { + t.Fatalf("unknown syscall 'openat2' on arch %q: %v", arch, err) + } + sysno += baseSysno + + syscallTests = append(syscallTests, syscallTest{ + sysno: sysno, + syscall: fmt.Sprintf("syscall_%#x", sysno), + expected: int(retErrnoEnosys), + }) + } + + // Test syscalls in the explicit list. + for _, test := range syscallTests { + // Override the expected value in the two special cases. + if !archSet[arch] || isAllowAction(defaultAction) { + test.expected = int(retFallthrough) + } + + payload := mockSyscallPayload(t, test.sysno, nativeArch, 0x1337, 0xF00BA5) + ret, err := filter.Run(payload) + if err != nil { + t.Fatalf("error running filter: %v", err) + } + if ret != test.expected { + t.Logf("payload: %#v", payload) + t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, nativeArch, test.syscall, test.sysno, ret, test.expected) + } + } + } + } +} + +var testActions = map[string]configs.Action{ + "allow": configs.Allow, + "log": configs.Log, + "errno": configs.Errno, + "kill": configs.Kill, +} + +func TestEnosysStub_SingleArch(t *testing.T) { + for _, arch := range testArches { + arches := []string{arch} + t.Run("arch="+arch, func(t *testing.T) { + for name, action := range testActions { + t.Run("action="+name, func(t *testing.T) { + testEnosysStub(t, action, arches) + }) + } + }) + } +} + +func TestEnosysStub_MultiArch(t *testing.T) { + for end := 0; end < len(testArches); end++ { + for start := 0; start < end; start++ { + arches := testArches[start:end] + if len(arches) <= 1 { + continue + } + for name, action := range testActions { + t.Run("action="+name, func(t *testing.T) { + testEnosysStub(t, action, arches) + }) + } + } + } +}