Skip to content

Commit

Permalink
zed: use separate reaper thread and collect ZEDLETs asynchronously
Browse files Browse the repository at this point in the history
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Ahelenia Ziemiańska <[email protected]>
Closes openzfs#11807
  • Loading branch information
nabijaczleweli authored and RageLtMan committed May 31, 2021
1 parent a3d316d commit 54a5732
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 55 deletions.
6 changes: 5 additions & 1 deletion cmd/zed/zed.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ _setup_sig_handlers(void)
zed_log_die("Failed to initialize sigset");

sa.sa_flags = SA_RESTART;
sa.sa_handler = SIG_IGN;

sa.sa_handler = SIG_IGN;
if (sigaction(SIGPIPE, &sa, NULL) < 0)
zed_log_die("Failed to ignore SIGPIPE");

Expand All @@ -75,6 +75,10 @@ _setup_sig_handlers(void)
sa.sa_handler = _hup_handler;
if (sigaction(SIGHUP, &sa, NULL) < 0)
zed_log_die("Failed to register SIGHUP handler");

(void) sigaddset(&sa.sa_mask, SIGCHLD);
if (pthread_sigmask(SIG_BLOCK, &sa.sa_mask, NULL) < 0)
zed_log_die("Failed to block SIGCHLD");
}

/*
Expand Down
4 changes: 3 additions & 1 deletion cmd/zed/zed_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libzfs.h> /* FIXME: Replace with libzfs_core. */
#include <libzfs_core.h>
#include <paths.h>
#include <stdarg.h>
#include <stdio.h>
Expand Down Expand Up @@ -96,6 +96,8 @@ zed_event_fini(struct zed_conf *zcp)
libzfs_fini(zcp->zfs_hdl);
zcp->zfs_hdl = NULL;
}

zed_exec_fini();
}

/*
Expand Down
194 changes: 147 additions & 47 deletions cmd/zed/zed_exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,52 @@
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <sys/avl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include "zed_exec.h"
#include "zed_file.h"
#include "zed_log.h"
#include "zed_strings.h"

#define ZEVENT_FILENO 3

struct launched_process_node {
avl_node_t node;
pid_t pid;
uint64_t eid;
char *name;
};

static int
_launched_process_node_compare(const void *x1, const void *x2)
{
pid_t p1;
pid_t p2;

assert(x1 != NULL);
assert(x2 != NULL);

p1 = ((const struct launched_process_node *) x1)->pid;
p2 = ((const struct launched_process_node *) x2)->pid;

if (p1 < p2)
return (-1);
else if (p1 == p2)
return (0);
else
return (1);
}

static pthread_t _reap_children_tid = (pthread_t)-1;
static volatile boolean_t _reap_children_stop;
static avl_tree_t _launched_processes;
static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER;

/*
* Create an environment string array for passing to execve() using the
* NAME=VALUE strings in container [zsp].
Expand Down Expand Up @@ -85,8 +120,8 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
int n;
pid_t pid;
int fd;
pid_t wpid;
int status;
struct launched_process_node *node;
sigset_t mask;

assert(dir != NULL);
assert(prog != NULL);
Expand All @@ -107,6 +142,9 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
prog, eid, strerror(errno));
return;
} else if (pid == 0) {
(void) sigemptyset(&mask);
(void) sigprocmask(SIG_SETMASK, &mask, NULL);

(void) umask(022);
if ((fd = open("/dev/null", O_RDWR)) != -1) {
(void) dup2(fd, STDIN_FILENO);
Expand All @@ -124,57 +162,108 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
prog, eid, pid);

/* FIXME: Timeout rogue child processes with sigalarm? */

/*
* Wait for child process using WNOHANG to limit
* the time spent waiting to 10 seconds (10,000ms).
*/
for (n = 0; n < 1000; n++) {
wpid = waitpid(pid, &status, WNOHANG);
if (wpid == (pid_t)-1) {
if (errno == EINTR)
continue;
zed_log_msg(LOG_WARNING,
"Failed to wait for \"%s\" eid=%llu pid=%d",
prog, eid, pid);
break;
} else if (wpid == 0) {
struct timespec t;

/* child still running */
t.tv_sec = 0;
t.tv_nsec = 10000000; /* 10ms */
(void) nanosleep(&t, NULL);
continue;
}
node = calloc(1, sizeof (*node));
if (node) {
node->pid = pid;
node->eid = eid;
node->name = strdup(prog);
(void) pthread_mutex_lock(&_launched_processes_lock);
avl_add(&_launched_processes, node);
(void) pthread_mutex_unlock(&_launched_processes_lock);
}
}

if (WIFEXITED(status)) {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d exit=%d",
prog, eid, pid, WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d sig=%d/%s",
prog, eid, pid, WTERMSIG(status),
strsignal(WTERMSIG(status)));
static void
_nop(int sig)
{}

static void *
_reap_children(void *arg)
{
struct launched_process_node node, *pnode;
pid_t pid;
int status;
struct sigaction sa = {};

(void) sigfillset(&sa.sa_mask);
(void) sigdelset(&sa.sa_mask, SIGCHLD);
(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);

(void) sigemptyset(&sa.sa_mask);
sa.sa_handler = _nop;
sa.sa_flags = SA_NOCLDSTOP;
(void) sigaction(SIGCHLD, &sa, NULL);

for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
pid = waitpid(0, &status, 0);

if (pid == (pid_t)-1) {
if (errno == ECHILD)
pause();
else if (errno != EINTR)
zed_log_msg(LOG_WARNING,
"Failed to wait for children: %s",
strerror(errno));
} else {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d status=0x%X",
prog, eid, (unsigned int) status);
memset(&node, 0, sizeof (node));
node.pid = pid;
(void) pthread_mutex_lock(&_launched_processes_lock);
pnode = avl_find(&_launched_processes, &node, NULL);
if (pnode) {
memcpy(&node, pnode, sizeof (node));

avl_remove(&_launched_processes, pnode);
free(pnode);
}
(void) pthread_mutex_unlock(&_launched_processes_lock);

if (WIFEXITED(status)) {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d exit=%d",
node.name, node.eid, pid,
WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d sig=%d/%s",
node.name, node.eid, pid, WTERMSIG(status),
strsignal(WTERMSIG(status)));
} else {
zed_log_msg(LOG_INFO,
"Finished \"%s\" eid=%llu pid=%d "
"status=0x%X",
node.name, node.eid, (unsigned int) status);
}

free(node.name);
}
break;
}

/*
* kill child process after 10 seconds
*/
if (wpid == 0) {
zed_log_msg(LOG_WARNING, "Killing hung \"%s\" pid=%d",
prog, pid);
(void) kill(pid, SIGKILL);
(void) waitpid(pid, &status, 0);
return (NULL);
}

void
zed_exec_fini(void)
{
struct launched_process_node *node;
void *ck = NULL;

if (_reap_children_tid == (pthread_t)-1)
return;

_reap_children_stop = B_TRUE;
(void) pthread_kill(_reap_children_tid, SIGCHLD);
(void) pthread_join(_reap_children_tid, NULL);

while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) {
free(node->name);
free(node);
}
avl_destroy(&_launched_processes);

(void) pthread_mutex_destroy(&_launched_processes_lock);
(void) pthread_mutex_init(&_launched_processes_lock, NULL);

_reap_children_tid = (pthread_t)-1;
}

/*
Expand Down Expand Up @@ -206,6 +295,17 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
if (!dir || !zedlets || !envs || zfd < 0)
return (-1);

if (_reap_children_tid == (pthread_t)-1) {
if (pthread_create(&_reap_children_tid, NULL,
_reap_children, NULL) != 0)
return (-1);
pthread_setname_np(_reap_children_tid, "reap ZEDLETs");

avl_create(&_launched_processes, _launched_process_node_compare,
sizeof (struct launched_process_node),
offsetof(struct launched_process_node, node));
}

csp = class_strings;

if (class)
Expand Down
2 changes: 2 additions & 0 deletions cmd/zed/zed_exec.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <stdint.h>
#include "zed_strings.h"

void zed_exec_fini(void);

int zed_exec_process(uint64_t eid, const char *class, const char *subclass,
const char *dir, zed_strings_t *zedlets, zed_strings_t *envs,
int zevent_fd);
Expand Down
6 changes: 0 additions & 6 deletions man/man8/zed.8.in
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,6 @@ Terminate the daemon.

.SH BUGS
.PP
Events are processed synchronously by a single thread. This can delay the
processing of simultaneous zevents.
.PP
ZEDLETs are killed after a maximum of ten seconds.
This can lead to a violation of a ZEDLET's atomicity assumptions.
.PP
The ownership and permissions of the \fIenabled-zedlets\fR directory (along
with all parent directories) are not checked. If any of these directories
are improperly owned or permissioned, an unprivileged user could insert a
Expand Down

0 comments on commit 54a5732

Please sign in to comment.