chiark / gitweb /
udevd: fix bogus mkdir invocation
[elogind.git] / src / udev / udevd.c
index 35478c19caa51b2324032e03e025fbf53fd621c1..a5478a754ed8ab270bf1e0a4c9f533a95ae4b834 100644 (file)
@@ -46,6 +46,8 @@
 
 #include "udev.h"
 #include "sd-daemon.h"
+#include "cgroup-util.h"
+#include "dev-setup.h"
 
 static bool debug;
 
@@ -72,6 +74,7 @@ static int exec_delay;
 static sigset_t sigmask_orig;
 static UDEV_LIST(event_list);
 static UDEV_LIST(worker_list);
+char *udev_cgroup;
 static bool udev_exit;
 
 enum event_state {
@@ -96,13 +99,9 @@ struct event {
         int ifindex;
 };
 
-static struct event *node_to_event(struct udev_list_node *node)
+static inline struct event *node_to_event(struct udev_list_node *node)
 {
-        char *event;
-
-        event = (char *)node;
-        event -= offsetof(struct event, node);
-        return (struct event *)event;
+        return container_of(node, struct event, node);
 }
 
 static void event_queue_cleanup(struct udev *udev, enum event_state type);
@@ -131,13 +130,9 @@ struct worker_message {
         int exitcode;
 };
 
-static struct worker *node_to_worker(struct udev_list_node *node)
+static inline struct worker *node_to_worker(struct udev_list_node *node)
 {
-        char *worker;
-
-        worker = (char *)node;
-        worker -= offsetof(struct worker, node);
-        return (struct worker *)worker;
+        return container_of(node, struct worker, node);
 }
 
 static void event_queue_delete(struct event *event, bool export)
@@ -267,6 +262,9 @@ static void worker_new(struct event *event)
                 /* request TERM signal if parent exits */
                 prctl(PR_SET_PDEATHSIG, SIGTERM);
 
+                /* reset OOM score, we only protect the main daemon */
+                write_one_line_file("/proc/self/oom_score_adj", "0");
+
                 for (;;) {
                         struct udev_event *udev_event;
                         struct worker_message msg;
@@ -325,11 +323,10 @@ static void worker_new(struct event *event)
                                 int fdcount;
                                 int i;
 
-                                fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), -1);
+                                fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
                                 if (fdcount < 0) {
                                         if (errno == EINTR)
                                                 continue;
-                                        err = -errno;
                                         log_error("failed to poll: %m\n");
                                         goto out;
                                 }
@@ -453,22 +450,13 @@ static int event_queue_insert(struct udev_device *dev)
         return 0;
 }
 
-static void worker_kill(struct udev *udev, int retain)
+static void worker_kill(struct udev *udev)
 {
         struct udev_list_node *loop;
-        int max;
-
-        if (children <= retain)
-                return;
-
-        max = children - retain;
 
         udev_list_node_foreach(loop, &worker_list) {
                 struct worker *worker = node_to_worker(loop);
 
-                if (max-- <= 0)
-                        break;
-
                 if (worker->state == WORKER_KILLED)
                         continue;
 
@@ -636,7 +624,7 @@ static struct udev_ctrl_connection *handle_ctrl_msg(struct udev_ctrl *uctrl)
                 log_debug("udevd message (SET_LOG_PRIORITY) received, log_priority=%i\n", i);
                 log_set_max_level(i);
                 udev_set_log_priority(udev, i);
-                worker_kill(udev, 0);
+                worker_kill(udev);
         }
 
         if (udev_ctrl_get_stop_exec_queue(ctrl_msg) > 0) {
@@ -678,7 +666,7 @@ static struct udev_ctrl_connection *handle_ctrl_msg(struct udev_ctrl *uctrl)
                         }
                         free(key);
                 }
-                worker_kill(udev, 0);
+                worker_kill(udev);
         }
 
         i = udev_ctrl_get_set_children_max(ctrl_msg);
@@ -818,7 +806,7 @@ static void static_dev_create_from_modules(struct udev *udev)
         FILE *f;
 
         uname(&kernel);
-        util_strscpyl(modules, sizeof(modules), "/lib/modules/", kernel.release, "/modules.devname", NULL);
+        util_strscpyl(modules, sizeof(modules), ROOTPREFIX "/lib/modules/", kernel.release, "/modules.devname", NULL);
         f = fopen(modules, "r");
         if (f == NULL)
                 return;
@@ -864,54 +852,18 @@ static void static_dev_create_from_modules(struct udev *udev)
                 else
                         continue;
 
-                util_strscpyl(filename, sizeof(filename), udev_get_dev_path(udev), "/", devname, NULL);
-                util_create_path_selinux(udev, filename);
-                udev_selinux_setfscreatecon(udev, filename, mode);
+                util_strscpyl(filename, sizeof(filename), "/dev/", devname, NULL);
+                mkdir_parents_label(filename, 0755);
+                label_context_set(filename, mode);
                 log_debug("mknod '%s' %c%u:%u\n", filename, type, maj, min);
                 if (mknod(filename, mode, makedev(maj, min)) < 0 && errno == EEXIST)
                         utimensat(AT_FDCWD, filename, NULL, 0);
-                udev_selinux_resetfscreatecon(udev);
+                label_context_clear();
         }
 
         fclose(f);
 }
 
-/* needed for standalone udev operations */
-static void static_dev_create_links(struct udev *udev)
-{
-        DIR *dir;
-
-        dir = opendir(udev_get_dev_path(udev));
-        if (dir == NULL)
-                return;
-
-        struct stdlinks {
-                const char *link;
-                const char *target;
-        };
-        static const struct stdlinks stdlinks[] = {
-                { "core", "/proc/kcore" },
-                { "fd", "/proc/self/fd" },
-                { "stdin", "/proc/self/fd/0" },
-                { "stdout", "/proc/self/fd/1" },
-                { "stderr", "/proc/self/fd/2" },
-        };
-        unsigned int i;
-
-        for (i = 0; i < ARRAY_SIZE(stdlinks); i++) {
-                struct stat sb;
-
-                if (stat(stdlinks[i].target, &sb) == 0) {
-                        udev_selinux_setfscreateconat(udev, dirfd(dir), stdlinks[i].link, S_IFLNK);
-                        if (symlinkat(stdlinks[i].target, dirfd(dir), stdlinks[i].link) < 0 && errno == EEXIST)
-                                utimensat(dirfd(dir), stdlinks[i].link, NULL, AT_SYMLINK_NOFOLLOW);
-                        udev_selinux_resetfscreatecon(udev);
-                }
-        }
-
-        closedir(dir);
-}
-
 static int mem_size_mb(void)
 {
         FILE *f;
@@ -943,22 +895,20 @@ static int convert_db(struct udev *udev)
         struct udev_list_entry *list_entry;
 
         /* current database */
-        util_strscpyl(filename, sizeof(filename), udev_get_run_path(udev), "/data", NULL);
-        if (access(filename, F_OK) >= 0)
+        if (access("/run/udev/data", F_OK) >= 0)
                 return 0;
 
         /* make sure we do not get here again */
-        util_create_path(udev, filename);
-        mkdir(filename, 0755);
+        mkdir_p("/run/udev/data", 0755);
 
         /* old database */
-        util_strscpyl(filename, sizeof(filename), udev_get_dev_path(udev), "/.udev/db", NULL);
+        util_strscpyl(filename, sizeof(filename), "/dev/.udev/db", NULL);
         if (access(filename, F_OK) < 0)
                 return 0;
 
         f = fopen("/dev/kmsg", "w");
         if (f != NULL) {
-                fprintf(f, "<30>udevd[%u]: converting old udev database\n", getpid());
+                fprintf(f, "<30>systemd-udevd[%u]: converting old udev database\n", getpid());
                 fclose(f);
         }
 
@@ -985,7 +935,7 @@ static int convert_db(struct udev *udev)
 
                         /* find database in old location */
                         id = udev_device_get_id_filename(device);
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev), "/.udev/db/", id, NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/", id, NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -995,9 +945,8 @@ static int convert_db(struct udev *udev)
                         }
 
                         /* find old database with $subsys:$sysname name */
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev),
-                                     "/.udev/db/", udev_device_get_subsystem(device), ":",
-                                     udev_device_get_sysname(device), NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/",
+                                      udev_device_get_subsystem(device), ":", udev_device_get_sysname(device), NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -1008,7 +957,7 @@ static int convert_db(struct udev *udev)
 
                         /* find old database with the encoded devpath name */
                         util_path_encode(udev_device_get_devpath(device), devpath, sizeof(devpath));
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev), "/.udev/db/", devpath, NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/", devpath, NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -1063,34 +1012,46 @@ static int systemd_fds(struct udev *udev, int *rctrl, int *rnetlink)
         return 0;
 }
 
-static bool check_rules_timestamp(struct udev *udev)
+/*
+ * read the kernel commandline, in case we need to get into debug mode
+ *   udev.log-priority=<level>              syslog priority
+ *   udev.children-max=<number of workers>  events are fully serialized if set to 1
+ *   udev.exec-delay=<number of seconds>    delay execution of every executed program
+ */
+static void kernel_cmdline_options(struct udev *udev)
 {
-        char **p;
-        unsigned long long *stamp_usec;
-        int i, n;
-        bool changed = false;
+        char *line, *w, *state;
+        size_t l;
 
-        n = udev_get_rules_path(udev, &p, &stamp_usec);
-        for (i = 0; i < n; i++) {
-                struct stat stats;
+        if (read_one_line_file("/proc/cmdline", &line) < 0)
+                return;
 
-                if (stat(p[i], &stats) < 0)
-                        continue;
+        FOREACH_WORD_QUOTED(w, l, line, state) {
+                char *s, *opt;
 
-                if (stamp_usec[i] == ts_usec(&stats.st_mtim))
-                        continue;
+                s = strndup(w, l);
+                if (!s)
+                        break;
 
-                /* first check */
-                if (stamp_usec[i] != 0) {
-                        log_debug("reload - timestamp of '%s' changed\n", p[i]);
-                        changed = true;
-                }
+                /* accept the same options for the initrd, prefixed with "rd." */
+                if (in_initrd() && startswith(s, "rd."))
+                        opt = s + 3;
+                else
+                        opt = s;
+
+                if (startswith(opt, "udev.log-priority="))
+                        udev_set_log_priority(udev, util_log_priority(opt + 18));
 
-                /* update timestamp */
-                stamp_usec[i] = ts_usec(&stats.st_mtim);
+                if (startswith(opt, "udev.children-max="))
+                        children_max = strtoul(opt + 18, NULL, 0);
+
+                if (startswith(opt, "udev.exec-delay="))
+                        exec_delay = strtoul(opt + 16, NULL, 0);
+
+                free(s);
         }
 
-        return changed;
+        free(line);
 }
 
 int main(int argc, char *argv[])
@@ -1115,7 +1076,6 @@ int main(int argc, char *argv[])
         int fd_worker = -1;
         struct epoll_event ep_ctrl, ep_inotify, ep_signal, ep_netlink, ep_worker;
         struct udev_ctrl_connection *ctrl_conn = NULL;
-        char **s;
         int rc = 1;
 
         udev = udev_new();
@@ -1126,7 +1086,7 @@ int main(int argc, char *argv[])
         log_parse_environment();
         udev_set_log_fn(udev, udev_main_log);
         log_debug("version %s\n", VERSION);
-        udev_selinux_init(udev);
+        label_init("/dev");
 
         for (;;) {
                 int option;
@@ -1147,8 +1107,8 @@ int main(int argc, char *argv[])
                         break;
                 case 'D':
                         debug = true;
-                        if (udev_get_log_priority(udev) < LOG_INFO)
-                                udev_set_log_priority(udev, LOG_INFO);
+                        log_set_max_level(LOG_DEBUG);
+                        udev_set_log_priority(udev, LOG_INFO);
                         break;
                 case 'N':
                         if (strcmp (optarg, "early") == 0) {
@@ -1182,39 +1142,7 @@ int main(int argc, char *argv[])
                 }
         }
 
-        /*
-         * read the kernel commandline, in case we need to get into debug mode
-         *   udev.log-priority=<level>              syslog priority
-         *   udev.children-max=<number of workers>  events are fully serialized if set to 1
-         *
-         */
-        f = fopen("/proc/cmdline", "r");
-        if (f != NULL) {
-                char cmdline[4096];
-
-                if (fgets(cmdline, sizeof(cmdline), f) != NULL) {
-                        char *pos;
-
-                        pos = strstr(cmdline, "udev.log-priority=");
-                        if (pos != NULL) {
-                                pos += strlen("udev.log-priority=");
-                                udev_set_log_priority(udev, util_log_priority(pos));
-                        }
-
-                        pos = strstr(cmdline, "udev.children-max=");
-                        if (pos != NULL) {
-                                pos += strlen("udev.children-max=");
-                                children_max = strtoul(pos, NULL, 0);
-                        }
-
-                        pos = strstr(cmdline, "udev.exec-delay=");
-                        if (pos != NULL) {
-                                pos += strlen("udev.exec-delay=");
-                                exec_delay = strtoul(pos, NULL, 0);
-                        }
-                }
-                fclose(f);
-        }
+        kernel_cmdline_options(udev);
 
         if (getuid() != 0) {
                 fprintf(stderr, "root privileges required\n");
@@ -1226,11 +1154,9 @@ int main(int argc, char *argv[])
         chdir("/");
         umask(022);
 
-        /* /run/udev */
-        mkdir(udev_get_run_path(udev), 0755);
+        mkdir("/run/udev", 0755);
 
-        /* create standard links, copy static nodes, create nodes from modules */
-        static_dev_create_links(udev);
+        dev_setup();
         static_dev_create_from_modules(udev);
 
         /* before opening new files, make sure std{in,out,err} fds are in a sane state */
@@ -1266,6 +1192,10 @@ int main(int argc, char *argv[])
                         rc = 3;
                         goto exit;
                 }
+
+                /* get our own cgroup, we regularly kill everything udev has left behind */
+                if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &udev_cgroup) < 0)
+                        udev_cgroup = NULL;
         } else {
                 /* open control and netlink socket */
                 udev_ctrl = udev_ctrl_new(udev);
@@ -1312,7 +1242,6 @@ int main(int argc, char *argv[])
 
         if (daemonize) {
                 pid_t pid;
-                int fd;
 
                 pid = fork();
                 switch (pid) {
@@ -1329,28 +1258,14 @@ int main(int argc, char *argv[])
 
                 setsid();
 
-                fd = open("/proc/self/oom_score_adj", O_RDWR);
-                if (fd < 0) {
-                        /* Fallback to old interface */
-                        fd = open("/proc/self/oom_adj", O_RDWR);
-                        if (fd < 0) {
-                                log_error("error disabling OOM: %m\n");
-                        } else {
-                                /* OOM_DISABLE == -17 */
-                                write(fd, "-17", 3);
-                                close(fd);
-                        }
-                } else {
-                        write(fd, "-1000", 5);
-                        close(fd);
-                }
+                write_one_line_file("/proc/self/oom_score_adj", "-1000");
         } else {
                 sd_notify(1, "READY=1");
         }
 
         f = fopen("/dev/kmsg", "w");
         if (f != NULL) {
-                fprintf(f, "<30>udevd[%u]: starting version " VERSION "\n", getpid());
+                fprintf(f, "<30>systemd-udevd[%u]: starting version " VERSION "\n", getpid());
                 fclose(f);
         }
 
@@ -1445,9 +1360,9 @@ int main(int argc, char *argv[])
 
                 /* set value depending on the amount of RAM */
                 if (memsize > 0)
-                        children_max = 128 + (memsize / 8);
+                        children_max = 16 + (memsize / 8);
                 else
-                        children_max = 128;
+                        children_max = 16;
         }
         log_debug("set children_max to %u\n", children_max);
 
@@ -1483,7 +1398,7 @@ int main(int argc, char *argv[])
 
                         /* discard queued events and kill workers */
                         event_queue_cleanup(udev, EVENT_QUEUED);
-                        worker_kill(udev, 0);
+                        worker_kill(udev);
 
                         /* exit after all has cleaned up */
                         if (udev_list_node_is_empty(&event_list) && udev_list_node_is_empty(&worker_list))
@@ -1491,14 +1406,18 @@ int main(int argc, char *argv[])
 
                         /* timeout at exit for workers to finish */
                         timeout = 30 * 1000;
-                } else if (udev_list_node_is_empty(&event_list) && children <= 2) {
+                } else if (udev_list_node_is_empty(&event_list) && !children) {
                         /* we are idle */
                         timeout = -1;
+
+                        /* cleanup possible left-over processes in our cgroup */
+                        if (udev_cgroup)
+                                cg_kill(SYSTEMD_CGROUP_CONTROLLER, udev_cgroup, SIGKILL, false, true, NULL);
                 } else {
                         /* kill idle or hanging workers */
                         timeout = 3 * 1000;
                 }
-                fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), timeout);
+                fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), timeout);
                 if (fdcount < 0)
                         continue;
 
@@ -1514,7 +1433,7 @@ int main(int argc, char *argv[])
                         /* kill idle workers */
                         if (udev_list_node_is_empty(&event_list)) {
                                 log_debug("cleanup idle workers\n");
-                                worker_kill(udev, 2);
+                                worker_kill(udev);
                         }
 
                         /* check for hanging events */
@@ -1525,7 +1444,7 @@ int main(int argc, char *argv[])
                                         continue;
 
                                 if ((now_usec() - worker->event_start_usec) > 30 * 1000 * 1000) {
-                                        log_error("worker [%u] timeout, kill it\n", worker->pid,
+                                        log_error("worker [%u] %s timeout; kill it\n", worker->pid,
                                             worker->event ? worker->event->devpath : "<idle>");
                                         kill(worker->pid, SIGKILL);
                                         worker->state = WORKER_KILLED;
@@ -1559,7 +1478,7 @@ int main(int argc, char *argv[])
 
                 /* check for changed config, every 3 seconds at most */
                 if ((now_usec() - last_usec) > 3 * 1000 * 1000) {
-                        if (check_rules_timestamp(udev))
+                        if (udev_rules_check_timestamp(rules))
                                 reload = true;
                         if (udev_builtin_validate(udev))
                                 reload = true;
@@ -1569,7 +1488,7 @@ int main(int argc, char *argv[])
 
                 /* reload requested, HUP signal received, rules changed, builtin changed */
                 if (reload) {
-                        worker_kill(udev, 0);
+                        worker_kill(udev);
                         rules = udev_rules_unref(rules);
                         udev_builtin_exit(udev);
                         reload = 0;
@@ -1649,7 +1568,7 @@ exit_daemonize:
         udev_queue_export_unref(udev_queue_export);
         udev_ctrl_connection_unref(ctrl_conn);
         udev_ctrl_unref(udev_ctrl);
-        udev_selinux_exit(udev);
+        label_finish();
         udev_unref(udev);
         log_close();
         return rc;