-
-
Notifications
You must be signed in to change notification settings - Fork 14.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
nixos/security/wrappers: fix for #98863 #201536
Changes from all commits
723cf9d
9b2d1a2
078dc83
0595c53
c01df35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,18 +15,16 @@ | |
#include <stdint.h> | ||
#include <syscall.h> | ||
#include <byteswap.h> | ||
#include <elf.h> | ||
|
||
#define ASSERT(expr) ((expr) ? (void) 0 : assert_failure(#expr)) | ||
|
||
extern char **environ; | ||
|
||
// The WRAPPER_DIR macro is supplied at compile time so that it cannot | ||
// be changed at runtime | ||
static char *wrapper_dir = WRAPPER_DIR; | ||
|
||
// Wrapper debug variable name | ||
static char *wrapper_debug = "WRAPPER_DEBUG"; | ||
|
||
#define LOG_PREFIX "nixos-setuid-wrapper: " | ||
|
||
#define CAP_SETPCAP 8 | ||
|
||
#if __BYTE_ORDER == __BIG_ENDIAN | ||
|
@@ -35,23 +33,25 @@ static char *wrapper_debug = "WRAPPER_DEBUG"; | |
#define LE32_TO_H(x) (x) | ||
#endif | ||
|
||
static noreturn void assert_failure(const char *assertion) { | ||
fprintf(stderr, "Assertion `%s` in NixOS's wrapper.c failed.\n", assertion); | ||
fflush(stderr); | ||
abort(); | ||
} | ||
#if ELF_HEADER_SIZE == 32 | ||
typedef Elf32_Ehdr Ehdr; | ||
#elif ELF_HEADER_SIZE == 64 | ||
typedef Elf64_Ehdr Ehdr; | ||
#else | ||
#error unsupported ELF_HEADER_SIZE | ||
#endif | ||
|
||
int get_last_cap(unsigned *last_cap) { | ||
FILE* file = fopen("/proc/sys/kernel/cap_last_cap", "r"); | ||
if (file == NULL) { | ||
int saved_errno = errno; | ||
fprintf(stderr, "failed to open /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); | ||
perror(LOG_PREFIX "failed to open /proc/sys/kernel/cap_last_cap\n"); | ||
return -saved_errno; | ||
} | ||
int res = fscanf(file, "%u", last_cap); | ||
if (res == EOF) { | ||
int saved_errno = errno; | ||
fprintf(stderr, "could not read number from /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); | ||
perror(LOG_PREFIX "could not read number from /proc/sys/kernel/cap_last_cap: %s\n"); | ||
return -saved_errno; | ||
} | ||
fclose(file); | ||
|
@@ -61,16 +61,17 @@ int get_last_cap(unsigned *last_cap) { | |
// Given the path to this program, fetch its configured capability set | ||
// (as set by `setcap ... /path/to/file`) and raise those capabilities | ||
// into the Ambient set. | ||
static int make_caps_ambient(const char *self_path) { | ||
static int make_caps_ambient(int exe_fd) { | ||
struct vfs_ns_cap_data data = {}; | ||
int r = getxattr(self_path, "security.capability", &data, sizeof(data)); | ||
|
||
int r = fgetxattr(exe_fd, "security.capability", &data, sizeof(data)); | ||
|
||
if (r < 0) { | ||
if (errno == ENODATA) { | ||
// no capabilities set | ||
return 0; | ||
} | ||
fprintf(stderr, "cannot get capabilities for %s: %s", self_path, strerror(errno)); | ||
fprintf(stderr, LOG_PREFIX "cannot get capabilities for wrapper: %s", strerror(errno)); | ||
return 1; | ||
} | ||
|
||
|
@@ -85,7 +86,7 @@ static int make_caps_ambient(const char *self_path) { | |
size = VFS_CAP_U32_3; | ||
break; | ||
default: | ||
fprintf(stderr, "BUG! Unsupported capability version 0x%x on %s. Report to NixOS bugtracker\n", version, self_path); | ||
fprintf(stderr, LOG_PREFIX "BUG! Unsupported capability version 0x%x on wrapper. Report to NixOS bugtracker\n", version); | ||
return 1; | ||
} | ||
|
||
|
@@ -102,7 +103,7 @@ static int make_caps_ambient(const char *self_path) { | |
} | ||
|
||
if (syscall(SYS_capset, &header, &user_data) < 0) { | ||
fprintf(stderr, "failed to inherit capabilities: %s", strerror(errno)); | ||
fprintf(stderr, LOG_PREFIX "failed to inherit capabilities: %s", strerror(errno)); | ||
return 1; | ||
} | ||
unsigned last_cap; | ||
|
@@ -125,113 +126,120 @@ static int make_caps_ambient(const char *self_path) { | |
// though???? I'm preferring a strict vs. loose policy here. | ||
if (cap == CAP_SETPCAP) { | ||
if(getenv(wrapper_debug)) { | ||
fprintf(stderr, "cap_setpcap in set, skipping it\n"); | ||
fprintf(stderr, LOG_PREFIX "cap_setpcap in set, skipping it\n"); | ||
} | ||
continue; | ||
} | ||
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) { | ||
fprintf(stderr, "cannot raise the capability %d into the ambient set: %s\n", cap, strerror(errno)); | ||
fprintf(stderr, LOG_PREFIX "cannot raise the capability %d into the ambient set: %s\n", cap, strerror(errno)); | ||
return 1; | ||
} | ||
if (getenv(wrapper_debug)) { | ||
fprintf(stderr, "raised %d into the ambient capability set\n", cap); | ||
fprintf(stderr, LOG_PREFIX "raised %d into the ambient capability set\n", cap); | ||
} | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int readlink_malloc(const char *p, char **ret) { | ||
size_t l = FILENAME_MAX+1; | ||
int r; | ||
ssize_t get_file_size(int exe_fd) { | ||
struct stat s; | ||
|
||
for (;;) { | ||
char *c = calloc(l, sizeof(char)); | ||
if (!c) { | ||
return -ENOMEM; | ||
} | ||
if (fstat(exe_fd, &s) == -1) { | ||
fprintf(stderr, LOG_PREFIX "failed to stat file /proc/self/exe: %s\n", strerror(errno)); | ||
return -1; | ||
} | ||
|
||
ssize_t n = readlink(p, c, l-1); | ||
if (n < 0) { | ||
r = -errno; | ||
free(c); | ||
return r; | ||
} | ||
return s.st_size; | ||
} | ||
|
||
if ((size_t) n < l-1) { | ||
c[n] = 0; | ||
*ret = c; | ||
int pread_all(int fd, void* buf, size_t size, off_t offset) { | ||
for (;;) { | ||
ssize_t nread = pread(fd, buf, size, offset); | ||
if (nread > 0) { | ||
if (nread != size) { | ||
fprintf(stderr, LOG_PREFIX "short read on /proc/self/exe: expected: %zu, got: %zd\n", size, nread); | ||
return -1; | ||
} | ||
return 0; | ||
} | ||
if (errno != EINTR) { | ||
perror(LOG_PREFIX "failed to read /proc/self/exe"); | ||
return -1; | ||
} | ||
} | ||
} | ||
|
||
ssize_t get_elf_size(int exe_fd) { | ||
Ehdr elf_header; | ||
if (pread_all(exe_fd, &elf_header, sizeof(Ehdr), 0) == -1) { | ||
return -1; | ||
}; | ||
|
||
return elf_header.e_shoff + (elf_header.e_shnum * elf_header.e_shentsize); | ||
} | ||
|
||
free(c); | ||
l *= 2; | ||
// Reads the program name from the end of the ELF structure. We assume that the | ||
// program name was appended to the program using `echo -n`. | ||
char* get_progname(int exe_fd) { | ||
Mic92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ssize_t file_size = get_file_size(exe_fd); | ||
if (file_size < 0) { | ||
return NULL; | ||
} | ||
|
||
ssize_t elf_size = get_elf_size(exe_fd); | ||
if (elf_size < 0) { | ||
return NULL; | ||
} | ||
|
||
if (elf_size > file_size) { | ||
fprintf(stderr, LOG_PREFIX "ELF size of /proc/self/exe is larger than file size!?\n"); | ||
return NULL; | ||
} | ||
|
||
if (elf_size == file_size) { | ||
fprintf(stderr, LOG_PREFIX "no extra bytes found for target program name found in security wrapper, did you forget to concat it?\n"); | ||
return NULL; | ||
} | ||
|
||
char *prog_name = calloc(file_size - elf_size, 1); | ||
if (!prog_name) { | ||
perror(LOG_PREFIX "cannot allocate memory for prog_name"); | ||
return NULL; | ||
} | ||
|
||
if (pread_all(exe_fd, prog_name, file_size - elf_size, elf_size) == -1) { | ||
free(prog_name); | ||
return NULL; | ||
}; | ||
|
||
return prog_name; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
ASSERT(argc >= 1); | ||
char *self_path = NULL; | ||
int self_path_size = readlink_malloc("/proc/self/exe", &self_path); | ||
if (self_path_size < 0) { | ||
fprintf(stderr, "cannot readlink /proc/self/exe: %s", strerror(-self_path_size)); | ||
} | ||
|
||
// Make sure that we are being executed from the right location, | ||
// i.e., `safe_wrapper_dir'. This is to prevent someone from creating | ||
// hard link `X' from some other location, along with a false | ||
// `X.real' file, to allow arbitrary programs from being executed | ||
// with elevated capabilities. | ||
int len = strlen(wrapper_dir); | ||
if (len > 0 && '/' == wrapper_dir[len - 1]) | ||
--len; | ||
ASSERT(!strncmp(self_path, wrapper_dir, len)); | ||
ASSERT('/' == wrapper_dir[0]); | ||
ASSERT('/' == self_path[len]); | ||
|
||
// Make *really* *really* sure that we were executed as | ||
// `self_path', and not, say, as some other setuid program. That | ||
// is, our effective uid/gid should match the uid/gid of | ||
// `self_path'. | ||
struct stat st; | ||
ASSERT(lstat(self_path, &st) != -1); | ||
|
||
ASSERT(!(st.st_mode & S_ISUID) || (st.st_uid == geteuid())); | ||
ASSERT(!(st.st_mode & S_ISGID) || (st.st_gid == getegid())); | ||
|
||
// And, of course, we shouldn't be writable. | ||
ASSERT(!(st.st_mode & (S_IWGRP | S_IWOTH))); | ||
|
||
// Read the path of the real (wrapped) program from <self>.real. | ||
char real_fn[PATH_MAX + 10]; | ||
int real_fn_size = snprintf(real_fn, sizeof(real_fn), "%s.real", self_path); | ||
ASSERT(real_fn_size < sizeof(real_fn)); | ||
|
||
int fd_self = open(real_fn, O_RDONLY); | ||
ASSERT(fd_self != -1); | ||
|
||
char source_prog[PATH_MAX]; | ||
len = read(fd_self, source_prog, PATH_MAX); | ||
ASSERT(len != -1); | ||
ASSERT(len < sizeof(source_prog)); | ||
ASSERT(len > 0); | ||
source_prog[len] = 0; | ||
|
||
close(fd_self); | ||
int exe_fd = open("/proc/self/exe", O_CLOEXEC); | ||
if (exe_fd < 0) { | ||
perror(LOG_PREFIX "cannot open /proc/self/exe"); | ||
return 1; | ||
} | ||
|
||
char *prog_name = get_progname(exe_fd); | ||
if (!prog_name) { | ||
return 1; | ||
} | ||
|
||
// Read the capabilities set on the wrapper and raise them in to | ||
// the ambient set so the program we're wrapping receives the | ||
// capabilities too! | ||
if (make_caps_ambient(self_path) != 0) { | ||
free(self_path); | ||
if (make_caps_ambient(exe_fd) != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, if we allow just-execute under no-new-priv, should we just log a warning here and go on anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to capabilities(7) no, because we might have raised some, but not all the capabilities we wanted raised. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, wouldn't it be more consistent with the other case to go on and see if the capabilities we can get are enough anyway for the task the user wants, though? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe. However, the documentation of capabilitysets is very confusing and claims that it's somehow important not to run things with subsets of capabilities they were intended to run with. I would like to not change the behaviour here until we understand why capabilities(7) makes such statements. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. Wording looks generic in the «race-condition-like-assumption-breaking-style»… There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You whoever plays with a system to make a point to be honest (i.e. you do have root to setup a silly situation with containers and setcap and seccomp and whatever else should not be mixed, and then as a non-root user you do something you are not supposed to be able to do!) It just looks like the same invariants are enforced on execve and on changing the capability sets, but I did not look carefully so I definitely missed something (the question is whether it makes any difference). If we consider things like SGID + setcap and having one but not the other applied confuses the wrapped executable, we should also fail on setuid/setgid failure, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No. There is no way to partially setgid (or to setuid-but-not-setgid or v.v.). In contrast that's very much not obvious with capabilities. E:
I assume you mean execve of a binary with filecaps. If so, then even the interface is different (according to docs): there is supposedly no effective capset on files, but only a single effective bit that applies to all caps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no way to «partially setgid», but setcap-without-setgid might still be a partial application of intended security context. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah this exclusivity is still there! Good, I guess
Hm, this part is not that interesting (different interfaces ending up with same conditions are fine), but there is indeed a capability-by-capability operation involved in the wrapper, so partial change is possible, you are right. |
||
free(prog_name); | ||
return 1; | ||
} | ||
free(self_path); | ||
|
||
execve(source_prog, argv, environ); | ||
fprintf(stderr, "%s: cannot run `%s': %s\n", | ||
argv[0], source_prog, strerror(errno)); | ||
execve(prog_name, argv, environ); | ||
|
||
fprintf(stderr, LOG_PREFIX "cannot execute `%s': %s\n", prog_name, strerror(errno)); | ||
free(prog_name); | ||
|
||
return 1; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,30 @@ | ||
{ stdenv, linuxHeaders, parentWrapperDir, debug ? false }: | ||
{ stdenv, linuxHeaders, debug ? false }: | ||
# For testing: | ||
# $ nix-build -E 'with import <nixpkgs> {}; pkgs.callPackage ./wrapper.nix { parentWrapperDir = "/run/wrappers"; debug = true; }' | ||
|
||
let | ||
elfHeaderSize = if stdenv.is64bit then | ||
"64" | ||
else if stdenv.is32bit then | ||
"32" | ||
else throw "unknown elf header size"; | ||
in | ||
stdenv.mkDerivation { | ||
name = "security-wrapper"; | ||
name = "nixos-security-wrapper"; | ||
buildInputs = [ linuxHeaders ]; | ||
dontUnpack = true; | ||
hardeningEnable = [ "pie" ]; | ||
CFLAGS = [ | ||
''-DWRAPPER_DIR="${parentWrapperDir}"'' | ||
''-DELF_HEADER_SIZE=${elfHeaderSize}'' | ||
] ++ (if debug then [ | ||
"-Werror" "-Og" "-g" | ||
] else [ | ||
"-Wall" "-O2" | ||
]); | ||
dontStrip = debug; | ||
|
||
installPhase = '' | ||
mkdir -p $out/bin | ||
$CC $CFLAGS ${./wrapper.c} -o $out/bin/security-wrapper | ||
$CC $CFLAGS ${./wrapper.c} -o $out/bin/nixos-security-wrapper | ||
''; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is no longer strictly true, right? Maybe at least change the example?