chiark / gitweb /
udevd: improve handling of failed worker
[elogind.git] / src / udev / udevd.c
index 99b033b0504f8cc2913677d082fb85a88a224268..ac21d511bb6bca4b4f9543d6b987b72b55419db0 100644 (file)
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
-#include <ctype.h>
 #include <fcntl.h>
-#include <time.h>
 #include <getopt.h>
-#include <dirent.h>
 #include <sys/file.h>
 #include <sys/time.h>
 #include <sys/prctl.h>
 #include <sys/socket.h>
-#include <sys/un.h>
 #include <sys/signalfd.h>
 #include <sys/epoll.h>
 #include <sys/mount.h>
-#include <sys/poll.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/inotify.h>
-#include <sys/utsname.h>
 
-#include "udev.h"
-#include "udev-util.h"
-#include "rtnl-util.h"
 #include "sd-daemon.h"
+#include "rtnl-util.h"
 #include "cgroup-util.h"
 #include "dev-setup.h"
 #include "fileio.h"
+#include "selinux-util.h"
+#include "udev.h"
+#include "udev-util.h"
 
 static struct udev_rules *rules;
 static struct udev_ctrl *udev_ctrl;
@@ -88,6 +83,7 @@ struct event {
         struct udev_list_node node;
         struct udev *udev;
         struct udev_device *dev;
+        struct udev_device *dev_kernel;
         enum event_state state;
         int exitcode;
         unsigned long long int delaying_seqnum;
@@ -138,6 +134,7 @@ static inline struct worker *node_to_worker(struct udev_list_node *node) {
 static void event_queue_delete(struct event *event) {
         udev_list_node_remove(&event->node);
         udev_device_unref(event->dev);
+        udev_device_unref(event->dev_kernel);
         free(event);
 }
 
@@ -157,7 +154,7 @@ static void worker_unref(struct worker *worker) {
         worker->refcount--;
         if (worker->refcount > 0)
                 return;
-        log_debug("worker [%u] cleaned up", worker->pid);
+        log_debug("worker ["PID_FMT"] cleaned up", worker->pid);
         worker_cleanup(worker);
 }
 
@@ -220,14 +217,14 @@ static void worker_new(struct event *event) {
                 sigfillset(&mask);
                 fd_signal = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
                 if (fd_signal < 0) {
-                        log_error("error creating signalfd %m");
+                        log_error_errno(errno, "error creating signalfd %m");
                         rc = 2;
                         goto out;
                 }
 
                 fd_ep = epoll_create1(EPOLL_CLOEXEC);
                 if (fd_ep < 0) {
-                        log_error("error creating epoll fd: %m");
+                        log_error_errno(errno, "error creating epoll fd: %m");
                         rc = 3;
                         goto out;
                 }
@@ -243,7 +240,7 @@ static void worker_new(struct event *event) {
 
                 if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_signal, &ep_signal) < 0 ||
                     epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_monitor, &ep_monitor) < 0) {
-                        log_error("fail to add fds to epoll: %m");
+                        log_error_errno(errno, "fail to add fds to epoll: %m");
                         rc = 4;
                         goto out;
                 }
@@ -293,7 +290,7 @@ static void worker_new(struct event *event) {
                                 if (d) {
                                         fd_lock = open(udev_device_get_devnode(d), O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
                                         if (fd_lock >= 0 && flock(fd_lock, LOCK_SH|LOCK_NB) < 0) {
-                                                log_debug("Unable to flock(%s), skipping event handling: %m", udev_device_get_devnode(d));
+                                                log_debug_errno(errno, "Unable to flock(%s), skipping event handling: %m", udev_device_get_devnode(d));
                                                 err = -EWOULDBLOCK;
                                                 fd_lock = safe_close(fd_lock);
                                                 goto skip;
@@ -315,8 +312,9 @@ static void worker_new(struct event *event) {
                                                arg_event_timeout_usec, arg_event_timeout_warn_usec,
                                                &sigmask_orig);
 
-                        /* in case rtnl was initialized */
-                        rtnl = sd_rtnl_ref(udev_event->rtnl);
+                        if (udev_event->rtnl)
+                                /* in case rtnl was initialized */
+                                rtnl = sd_rtnl_ref(udev_event->rtnl);
 
                         /* apply/restore inotify watch */
                         if (udev_event->inotify_watch) {
@@ -358,7 +356,7 @@ skip:
                                 if (fdcount < 0) {
                                         if (errno == EINTR)
                                                 continue;
-                                        log_error("failed to poll: %m");
+                                        log_error_errno(errno, "failed to poll: %m");
                                         goto out;
                                 }
 
@@ -398,7 +396,7 @@ out:
                 udev_monitor_unref(worker_monitor);
                 event->state = EVENT_QUEUED;
                 free(worker);
-                log_error("fork of child failed: %m");
+                log_error_errno(errno, "fork of child failed: %m");
                 break;
         default:
                 /* close monitor, but keep address around */
@@ -412,7 +410,7 @@ out:
                 event->state = EVENT_RUNNING;
                 udev_list_node_append(&worker->node, &worker_list);
                 children++;
-                log_debug("seq %llu forked new worker [%u]", udev_device_get_seqnum(event->dev), pid);
+                log_debug("seq %llu forked new worker ["PID_FMT"]", udev_device_get_seqnum(event->dev), pid);
                 break;
         }
 }
@@ -429,7 +427,8 @@ static void event_run(struct event *event) {
 
                 count = udev_monitor_send_device(monitor, worker->monitor, event->dev);
                 if (count < 0) {
-                        log_error("worker [%u] did not accept message %zi (%m), kill it", worker->pid, count);
+                        log_error_errno(errno, "worker ["PID_FMT"] did not accept message %zi (%m), kill it",
+                                        worker->pid, count);
                         kill(worker->pid, SIGKILL);
                         worker->state = WORKER_KILLED;
                         continue;
@@ -462,6 +461,8 @@ static int event_queue_insert(struct udev_device *dev) {
 
         event->udev = udev_device_get_udev(dev);
         event->dev = dev;
+        event->dev_kernel = udev_device_shallow_clone(dev);
+        udev_device_copy_properties(event->dev_kernel, dev);
         event->seqnum = udev_device_get_seqnum(dev);
         event->devpath = udev_device_get_devpath(dev);
         event->devpath_len = strlen(event->devpath);
@@ -815,41 +816,34 @@ static int synthesize_change(struct udev_device *dev) {
 }
 
 static int handle_inotify(struct udev *udev) {
-        int nbytes, pos;
-        char *buf;
-        struct inotify_event *ev;
-        int r;
+        union inotify_event_buffer buffer;
+        struct inotify_event *e;
+        ssize_t l;
 
-        r = ioctl(fd_inotify, FIONREAD, &nbytes);
-        if (r < 0 || nbytes <= 0)
-                return -errno;
+        l = read(fd_inotify, &buffer, sizeof(buffer));
+        if (l < 0) {
+                if (errno == EAGAIN || errno == EINTR)
+                        return 0;
 
-        buf = malloc(nbytes);
-        if (!buf) {
-                log_error("error getting buffer for inotify");
-                return -ENOMEM;
+                return log_error_errno(errno, "Failed to read inotify fd: %m");
         }
 
-        nbytes = read(fd_inotify, buf, nbytes);
-
-        for (pos = 0; pos < nbytes; pos += sizeof(struct inotify_event) + ev->len) {
+        FOREACH_INOTIFY_EVENT(e, buffer, l) {
                 struct udev_device *dev;
 
-                ev = (struct inotify_event *)(buf + pos);
-                dev = udev_watch_lookup(udev, ev->wd);
+                dev = udev_watch_lookup(udev, e->wd);
                 if (!dev)
                         continue;
 
-                log_debug("inotify event: %x for %s", ev->mask, udev_device_get_devnode(dev));
-                if (ev->mask & IN_CLOSE_WRITE)
+                log_debug("inotify event: %x for %s", e->mask, udev_device_get_devnode(dev));
+                if (e->mask & IN_CLOSE_WRITE)
                         synthesize_change(dev);
-                else if (ev->mask & IN_IGNORED)
+                else if (e->mask & IN_IGNORED)
                         udev_watch_end(udev, dev);
 
                 udev_device_unref(dev);
         }
 
-        free(buf);
         return 0;
 }
 
@@ -874,28 +868,33 @@ static void handle_signal(struct udev *udev, int signo) {
 
                                 if (worker->pid != pid)
                                         continue;
-                                log_debug("worker [%u] exit", pid);
+                                log_debug("worker ["PID_FMT"] exit", pid);
 
                                 if (WIFEXITED(status)) {
                                         if (WEXITSTATUS(status) != 0)
-                                                log_error("worker [%u] exit with return code %i",
+                                                log_error("worker ["PID_FMT"] exit with return code %i",
                                                           pid, WEXITSTATUS(status));
                                 } else if (WIFSIGNALED(status)) {
-                                        log_error("worker [%u] terminated by signal %i (%s)",
+                                        log_error("worker ["PID_FMT"] terminated by signal %i (%s)",
                                                   pid, WTERMSIG(status), strsignal(WTERMSIG(status)));
                                 } else if (WIFSTOPPED(status)) {
-                                        log_error("worker [%u] stopped", pid);
+                                        log_error("worker ["PID_FMT"] stopped", pid);
                                 } else if (WIFCONTINUED(status)) {
-                                        log_error("worker [%u] continued", pid);
+                                        log_error("worker ["PID_FMT"] continued", pid);
                                 } else {
-                                        log_error("worker [%u] exit with status 0x%04x", pid, status);
+                                        log_error("worker ["PID_FMT"] exit with status 0x%04x", pid, status);
                                 }
 
                                 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
                                         if (worker->event) {
-                                                log_error("worker [%u] failed while handling '%s'",
+                                                log_error("worker ["PID_FMT"] failed while handling '%s'",
                                                           pid, worker->event->devpath);
                                                 worker->event->exitcode = -32;
+                                                /* delete state from disk */
+                                                udev_device_delete_db(worker->event->dev);
+                                                udev_device_tag_index(worker->event->dev, NULL, false);
+                                                /* forward kernel event without ammending it */
+                                                udev_monitor_send_device(monitor, NULL, worker->event->dev_kernel);
                                                 event_queue_delete(worker->event);
 
                                                 /* drop reference taken for state 'running' */
@@ -913,6 +912,20 @@ static void handle_signal(struct udev *udev, int signo) {
         }
 }
 
+static void event_queue_update(void) {
+        int r;
+
+        if (!udev_list_node_is_empty(&event_list)) {
+                r = touch("/run/udev/queue");
+                if (r < 0)
+                        log_warning_errno(r, "could not touch /run/udev/queue: %m");
+        } else {
+                r = unlink("/run/udev/queue");
+                if (r < 0 && errno != ENOENT)
+                        log_warning("could not unlink /run/udev/queue: %m");
+        }
+}
+
 static int systemd_fds(struct udev *udev, int *rctrl, int *rnetlink) {
         int ctrl = -1, netlink = -1;
         int fd, n;
@@ -962,7 +975,7 @@ static void kernel_cmdline_options(struct udev *udev) {
 
         r = proc_cmdline(&line);
         if (r < 0) {
-                log_warning_errno(-r, "Failed to read /proc/cmdline, ignoring: %m");
+                log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
                 return;
         }
 
@@ -1009,14 +1022,15 @@ static void kernel_cmdline_options(struct udev *udev) {
 static void help(void) {
         printf("%s [OPTIONS...]\n\n"
                "Manages devices.\n\n"
-               "  --daemon\n"
-               "  --debug\n"
-               "  --children-max=<maximum number of workers>\n"
-               "  --exec-delay=<seconds to wait before executing RUN=>\n"
-               "  --event-timeout=<seconds to wait before terminating an event>\n"
-               "  --resolve-names=early|late|never\n"
-               "  --version\n"
-               "  --help\n"
+               "  -h --help                   Print this message\n"
+               "     --version                Print version of the program\n"
+               "     --daemon                 Detach and run in the background\n"
+               "     --debug                  Enable debug output\n"
+               "     --children-max=INT       Set maximum number of workers\n"
+               "     --exec-delay=SECONDS     Seconds to wait before executing RUN=\n"
+               "     --event-timeout=SECONDS  Seconds to wait before terminating an event\n"
+               "     --resolve-names=early|late|never\n"
+               "                              When to resolve users and groups\n"
                , program_invocation_short_name);
 }
 
@@ -1135,14 +1149,14 @@ int main(int argc, char *argv[]) {
 
         r = mac_selinux_init("/dev");
         if (r < 0) {
-                log_error_errno(-r, "could not initialize labelling: %m");
+                log_error_errno(r, "could not initialize labelling: %m");
                 goto exit;
         }
 
         /* set umask before creating any file/directory */
         r = chdir("/");
         if (r < 0) {
-                log_error("could not change dir to /: %m");
+                log_error_errno(errno, "could not change dir to /: %m");
                 goto exit;
         }
 
@@ -1152,7 +1166,7 @@ int main(int argc, char *argv[]) {
 
         r = mkdir("/run/udev", 0755);
         if (r < 0 && errno != EEXIST) {
-                log_error("could not create /run/udev: %m");
+                log_error_errno(errno, "could not create /run/udev: %m");
                 goto exit;
         }
 
@@ -1211,6 +1225,8 @@ int main(int argc, char *argv[]) {
                         goto exit;
                 }
                 fd_netlink = udev_monitor_get_fd(monitor);
+
+                udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024);
         }
 
         if (udev_monitor_enable_receiving(monitor) < 0) {
@@ -1225,9 +1241,7 @@ int main(int argc, char *argv[]) {
                 goto exit;
         }
 
-        udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024);
-
-        log_info("starting version " VERSION "\n");
+        log_info("starting version " VERSION);
 
         udev_builtin_init(udev);
 
@@ -1239,7 +1253,7 @@ int main(int argc, char *argv[]) {
 
         rc = udev_rules_apply_static_dev_perms(rules);
         if (rc < 0)
-                log_error_errno(-rc, "failed to apply permissions on static device nodes - %m");
+                log_error_errno(rc, "failed to apply permissions on static device nodes - %m");
 
         if (arg_daemonize) {
                 pid_t pid;
@@ -1249,7 +1263,7 @@ int main(int argc, char *argv[]) {
                 case 0:
                         break;
                 case -1:
-                        log_error("fork of daemon failed: %m");
+                        log_error_errno(errno, "fork of daemon failed: %m");
                         rc = 4;
                         goto exit;
                 default:
@@ -1312,7 +1326,7 @@ int main(int argc, char *argv[]) {
 
         fd_ep = epoll_create1(EPOLL_CLOEXEC);
         if (fd_ep < 0) {
-                log_error("error creating epoll fd: %m");
+                log_error_errno(errno, "error creating epoll fd: %m");
                 goto exit;
         }
         if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_ctrl, &ep_ctrl) < 0 ||
@@ -1320,7 +1334,7 @@ int main(int argc, char *argv[]) {
             epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_signal, &ep_signal) < 0 ||
             epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_netlink, &ep_netlink) < 0 ||
             epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_worker, &ep_worker) < 0) {
-                log_error("fail to add fds to epoll: %m");
+                log_error_errno(errno, "fail to add fds to epoll: %m");
                 goto exit;
         }
 
@@ -1372,15 +1386,7 @@ int main(int argc, char *argv[]) {
                 }
 
                 /* tell settle that we are busy or idle */
-                if (!udev_list_node_is_empty(&event_list)) {
-                        int fd;
-
-                        fd = open("/run/udev/queue", O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
-                        if (fd >= 0)
-                                close(fd);
-                } else {
-                        unlink("/run/udev/queue");
-                }
+                event_queue_update();
 
                 fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), timeout);
                 if (fdcount < 0)
@@ -1413,18 +1419,14 @@ int main(int argc, char *argv[]) {
 
                                 if ((ts - worker->event_start_usec) > arg_event_timeout_warn_usec) {
                                         if ((ts - worker->event_start_usec) > arg_event_timeout_usec) {
-                                                log_error("worker [%u] %s timeout; kill it", worker->pid, worker->event->devpath);
+                                                log_error("worker ["PID_FMT"] %s timeout; kill it", worker->pid, worker->event->devpath);
                                                 kill(worker->pid, SIGKILL);
                                                 worker->state = WORKER_KILLED;
 
-                                                /* drop reference taken for state 'running' */
-                                                worker_unref(worker);
                                                 log_error("seq %llu '%s' killed", udev_device_get_seqnum(worker->event->dev), worker->event->devpath);
                                                 worker->event->exitcode = -64;
-                                                event_queue_delete(worker->event);
-                                                worker->event = NULL;
                                         } else if (!worker->event_warned) {
-                                                log_warning("worker [%u] %s is taking a long time", worker->pid, worker->event->devpath);
+                                                log_warning("worker ["PID_FMT"] %s is taking a long time", worker->pid, worker->event->devpath);
                                                 worker->event_warned = true;
                                         }
                                 }
@@ -1505,6 +1507,11 @@ int main(int argc, char *argv[]) {
                 if (is_inotify)
                         handle_inotify(udev);
 
+                /* tell settle that we are busy or idle, this needs to be before the
+                 * PING handling
+                 */
+                event_queue_update();
+
                 /*
                  * This needs to be after the inotify handling, to make sure,
                  * that the ping is send back after the possibly generated