diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index e6269bea812..78b6998c38f 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -253,7 +253,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { if c.cgroupns { subsystemPath := filepath.Join(c.root, b.Destination) subsystemName := filepath.Base(b.Destination) - if err := os.MkdirAll(subsystemPath, 0o755); err != nil { + if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { return err } if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error { @@ -406,15 +406,26 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile) } // Make the parent directory. - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + destDir, destBase := filepath.Split(dest) + destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755) + if err != nil { return "", fmt.Errorf("make parent dir of file bind-mount: %w", err) } - // Make the target file. - f, err := os.OpenFile(dest, os.O_CREATE, 0o755) - if err != nil { - return "", fmt.Errorf("create target of file bind-mount: %w", err) + defer destDirFd.Close() + // Make the target file. We want to avoid opening any file that is + // already there because it could be a "bad" file like an invalid + // device or hung tty that might cause a DoS, so we use mknodat. + // destBase does not contain any "/" components, and mknodat does + // not follow trailing symlinks, so we can safely just call mknodat + // here. + if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil { + // If we get EEXIST, there was already an inode there and + // we can consider that a success. + if !errors.Is(err, unix.EEXIST) { + err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err} + return "", fmt.Errorf("create target of file bind-mount: %w", err) + } } - _ = f.Close() // Nothing left to do. return dest, nil } @@ -433,7 +444,7 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri } } - if err := os.MkdirAll(dest, 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { return "", err } return dest, nil @@ -463,7 +474,7 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error { } else if !fi.IsDir() { return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) } - if err := os.MkdirAll(dest, 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { return err } // Selinux kernels do not support labeling of /proc or /sys. @@ -751,7 +762,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { if dest == rootfs { return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile) } - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil { return err } if bind { diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index 16edc6ba6d9..32bab6922bd 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -6,6 +6,8 @@ package system import ( "os" "os/exec" + "runtime" + "strings" "unsafe" "golang.org/x/sys/unix" @@ -102,3 +104,42 @@ func GetSubreaper() (int, error) { return int(i), nil } + +func prepareAt(dir *os.File, path string) (int, string) { + if dir == nil { + return unix.AT_FDCWD, path + } + + // Rather than just filepath.Join-ing path here, do it manually so the + // error and handle correctly indicate cases like path=".." as being + // relative to the correct directory. The handle.Name() might end up being + // wrong but because this is (currently) only used in MkdirAllInRoot, that + // isn't a problem. + dirName := dir.Name() + if !strings.HasSuffix(dirName, "/") { + dirName += "/" + } + fullPath := dirName + path + + return int(dir.Fd()), fullPath +} + +func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + fd, err := unix.Openat(dirFd, path, flags, mode) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil +} + +func Mkdirat(dir *os.File, path string, mode uint32) error { + dirFd, fullPath := prepareAt(dir, path) + err := unix.Mkdirat(dirFd, path, mode) + if err != nil { + err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return err +} diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go index 0d95a203789..460b94cef3f 100644 --- a/libcontainer/utils/utils_unix.go +++ b/libcontainer/utils/utils_unix.go @@ -4,12 +4,17 @@ package utils import ( + "errors" "fmt" "os" + "path/filepath" "strconv" "strings" _ "unsafe" // for go:linkname + "github.com/opencontainers/runc/libcontainer/system" + + securejoin "github.com/cyphar/filepath-securejoin" "golang.org/x/sys/unix" ) @@ -130,3 +135,112 @@ func IsLexicallyInRoot(root, path string) bool { } return strings.HasPrefix(path, root) } + +// MkdirAllInRootOpen attempts to make +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// os.MkdirAll(path, mode) +// os.Open(path) +// +// safer against attacks where components in the path are changed between +// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +// try to detect any symlink components in the path while we are doing the +// MkdirAll. +// +// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode +// (the suid/sgid/sticky bits are not the same as for os.FileMode). +// +// NOTE: If unsafePath is a subpath of root, we assume that you have already +// called SecureJoin and so we use the provided path verbatim without resolving +// any symlinks (this is done in a way that avoids symlink-exchange races). +// This means that the path also must not contain ".." elements, otherwise an +// error will occur. +// +// This is a somewhat less safe alternative to +// , but it should +// detect attempts to trick us into creating directories outside of the root. +// We should migrate to securejoin.MkdirAll once it is merged. +func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) { + // If the path is already "within" the root, use it verbatim. + fullPath := unsafePath + if !IsLexicallyInRoot(root, unsafePath) { + var err error + fullPath, err = securejoin.SecureJoin(root, unsafePath) + if err != nil { + return nil, err + } + } + subPath, err := filepath.Rel(root, fullPath) + if err != nil { + return nil, err + } + + // Check for any silly mode bits. + if mode&^0o7777 != 0 { + return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) + } + + currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open root handle: %w", err) + } + defer func() { + if Err != nil { + currentDir.Close() + } + }() + + for _, part := range strings.Split(subPath, string(filepath.Separator)) { + switch part { + case "", ".": + // Skip over no-op components. + continue + case "..": + return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath) + } + + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + switch { + case err == nil: + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + case errors.Is(err, unix.ENOTDIR): + // This might be a symlink or some other random file. Either way, + // error out. + return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR) + + case errors.Is(err, os.ErrNotExist): + // Luckily, mkdirat will not follow trailing symlinks, so this is + // safe to do as-is. + if err := system.Mkdirat(currentDir, part, mode); err != nil { + return nil, err + } + // Open the new directory. There is a race here where an attacker + // could swap the directory with a different directory, but + // MkdirAll's fuzzy semantics mean we don't care about that. + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open newly created directory: %w", err) + } + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + default: + return nil, err + } + } + return currentDir, nil +} + +// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +// returned handle, for callers that don't need to use it. +func MkdirAllInRoot(root, unsafePath string, mode uint32) error { + f, err := MkdirAllInRootOpen(root, unsafePath, mode) + if err == nil { + _ = f.Close() + } + return err +}