chiark / gitweb /
udev: limit minimum worker count to 16
[elogind.git] / src / udev / udevd.c
index fc33ff01170b7878a3e0e40479f7e68011b65d7a..8ad1eccac14a573eef077bdc844d64a3053248db 100644 (file)
@@ -46,6 +46,8 @@
 
 #include "udev.h"
 #include "sd-daemon.h"
+#include "cgroup-util.h"
+#include "dev-setup.h"
 
 static bool debug;
 
@@ -72,6 +74,7 @@ static int exec_delay;
 static sigset_t sigmask_orig;
 static UDEV_LIST(event_list);
 static UDEV_LIST(worker_list);
+char *udev_cgroup;
 static bool udev_exit;
 
 enum event_state {
@@ -96,13 +99,9 @@ struct event {
         int ifindex;
 };
 
-static struct event *node_to_event(struct udev_list_node *node)
+static inline struct event *node_to_event(struct udev_list_node *node)
 {
-        char *event;
-
-        event = (char *)node;
-        event -= offsetof(struct event, node);
-        return (struct event *)event;
+        return container_of(node, struct event, node);
 }
 
 static void event_queue_cleanup(struct udev *udev, enum event_state type);
@@ -131,13 +130,9 @@ struct worker_message {
         int exitcode;
 };
 
-static struct worker *node_to_worker(struct udev_list_node *node)
+static inline struct worker *node_to_worker(struct udev_list_node *node)
 {
-        char *worker;
-
-        worker = (char *)node;
-        worker -= offsetof(struct worker, node);
-        return (struct worker *)worker;
+        return container_of(node, struct worker, node);
 }
 
 static void event_queue_delete(struct event *event, bool export)
@@ -325,11 +320,10 @@ static void worker_new(struct event *event)
                                 int fdcount;
                                 int i;
 
-                                fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), -1);
+                                fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
                                 if (fdcount < 0) {
                                         if (errno == EINTR)
                                                 continue;
-                                        err = -errno;
                                         log_error("failed to poll: %m\n");
                                         goto out;
                                 }
@@ -453,22 +447,13 @@ static int event_queue_insert(struct udev_device *dev)
         return 0;
 }
 
-static void worker_kill(struct udev *udev, int retain)
+static void worker_kill(struct udev *udev)
 {
         struct udev_list_node *loop;
-        int max;
-
-        if (children <= retain)
-                return;
-
-        max = children - retain;
 
         udev_list_node_foreach(loop, &worker_list) {
                 struct worker *worker = node_to_worker(loop);
 
-                if (max-- <= 0)
-                        break;
-
                 if (worker->state == WORKER_KILLED)
                         continue;
 
@@ -636,7 +621,7 @@ static struct udev_ctrl_connection *handle_ctrl_msg(struct udev_ctrl *uctrl)
                 log_debug("udevd message (SET_LOG_PRIORITY) received, log_priority=%i\n", i);
                 log_set_max_level(i);
                 udev_set_log_priority(udev, i);
-                worker_kill(udev, 0);
+                worker_kill(udev);
         }
 
         if (udev_ctrl_get_stop_exec_queue(ctrl_msg) > 0) {
@@ -678,7 +663,7 @@ static struct udev_ctrl_connection *handle_ctrl_msg(struct udev_ctrl *uctrl)
                         }
                         free(key);
                 }
-                worker_kill(udev, 0);
+                worker_kill(udev);
         }
 
         i = udev_ctrl_get_set_children_max(ctrl_msg);
@@ -864,53 +849,18 @@ static void static_dev_create_from_modules(struct udev *udev)
                 else
                         continue;
 
-                util_strscpyl(filename, sizeof(filename), udev_get_dev_path(udev), "/", devname, NULL);
-                util_create_path_selinux(udev, filename);
-                udev_selinux_setfscreatecon(udev, filename, mode);
+                util_strscpyl(filename, sizeof(filename), "/dev/", devname, NULL);
+                mkdir_parents_label(filename, 0755);
+                label_context_set(filename, mode);
                 log_debug("mknod '%s' %c%u:%u\n", filename, type, maj, min);
                 if (mknod(filename, mode, makedev(maj, min)) < 0 && errno == EEXIST)
                         utimensat(AT_FDCWD, filename, NULL, 0);
-                udev_selinux_resetfscreatecon(udev);
+                label_context_clear();
         }
 
         fclose(f);
 }
 
-/* needed for standalone udev operations */
-static void static_dev_create_links(struct udev *udev)
-{
-        DIR *dir;
-        struct stdlinks {
-                const char *link;
-                const char *target;
-        };
-        static const struct stdlinks stdlinks[] = {
-                { "core", "/proc/kcore" },
-                { "fd", "/proc/self/fd" },
-                { "stdin", "/proc/self/fd/0" },
-                { "stdout", "/proc/self/fd/1" },
-                { "stderr", "/proc/self/fd/2" },
-        };
-        unsigned int i;
-
-        dir = opendir(udev_get_dev_path(udev));
-        if (dir == NULL)
-                return;
-
-        for (i = 0; i < ARRAY_SIZE(stdlinks); i++) {
-                struct stat sb;
-
-                if (stat(stdlinks[i].target, &sb) == 0) {
-                        udev_selinux_setfscreateconat(udev, dirfd(dir), stdlinks[i].link, S_IFLNK);
-                        if (symlinkat(stdlinks[i].target, dirfd(dir), stdlinks[i].link) < 0 && errno == EEXIST)
-                                utimensat(dirfd(dir), stdlinks[i].link, NULL, AT_SYMLINK_NOFOLLOW);
-                        udev_selinux_resetfscreatecon(udev);
-                }
-        }
-
-        closedir(dir);
-}
-
 static int mem_size_mb(void)
 {
         FILE *f;
@@ -942,16 +892,15 @@ static int convert_db(struct udev *udev)
         struct udev_list_entry *list_entry;
 
         /* current database */
-        util_strscpyl(filename, sizeof(filename), udev_get_run_path(udev), "/data", NULL);
-        if (access(filename, F_OK) >= 0)
+        if (access("/run/udev/data", F_OK) >= 0)
                 return 0;
 
         /* make sure we do not get here again */
-        util_create_path(udev, filename);
+        mkdir_parents("/run/udev/data", 0755);
         mkdir(filename, 0755);
 
         /* old database */
-        util_strscpyl(filename, sizeof(filename), udev_get_dev_path(udev), "/.udev/db", NULL);
+        util_strscpyl(filename, sizeof(filename), "/dev/.udev/db", NULL);
         if (access(filename, F_OK) < 0)
                 return 0;
 
@@ -984,7 +933,7 @@ static int convert_db(struct udev *udev)
 
                         /* find database in old location */
                         id = udev_device_get_id_filename(device);
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev), "/.udev/db/", id, NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/", id, NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -994,9 +943,8 @@ static int convert_db(struct udev *udev)
                         }
 
                         /* find old database with $subsys:$sysname name */
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev),
-                                     "/.udev/db/", udev_device_get_subsystem(device), ":",
-                                     udev_device_get_sysname(device), NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/",
+                                      udev_device_get_subsystem(device), ":", udev_device_get_sysname(device), NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -1007,7 +955,7 @@ static int convert_db(struct udev *udev)
 
                         /* find old database with the encoded devpath name */
                         util_path_encode(udev_device_get_devpath(device), devpath, sizeof(devpath));
-                        util_strscpyl(from, sizeof(from), udev_get_dev_path(udev), "/.udev/db/", devpath, NULL);
+                        util_strscpyl(from, sizeof(from), "/dev/.udev/db/", devpath, NULL);
                         if (lstat(from, &stats) == 0) {
                                 if (!have_db) {
                                         udev_device_read_db(device, from);
@@ -1062,36 +1010,6 @@ static int systemd_fds(struct udev *udev, int *rctrl, int *rnetlink)
         return 0;
 }
 
-static bool check_rules_timestamp(struct udev *udev)
-{
-        char **p;
-        unsigned long long *stamp_usec;
-        int i, n;
-        bool changed = false;
-
-        n = udev_get_rules_path(udev, &p, &stamp_usec);
-        for (i = 0; i < n; i++) {
-                struct stat stats;
-
-                if (stat(p[i], &stats) < 0)
-                        continue;
-
-                if (stamp_usec[i] == ts_usec(&stats.st_mtim))
-                        continue;
-
-                /* first check */
-                if (stamp_usec[i] != 0) {
-                        log_debug("reload - timestamp of '%s' changed\n", p[i]);
-                        changed = true;
-                }
-
-                /* update timestamp */
-                stamp_usec[i] = ts_usec(&stats.st_mtim);
-        }
-
-        return changed;
-}
-
 int main(int argc, char *argv[])
 {
         struct udev *udev;
@@ -1124,7 +1042,7 @@ int main(int argc, char *argv[])
         log_parse_environment();
         udev_set_log_fn(udev, udev_main_log);
         log_debug("version %s\n", VERSION);
-        udev_selinux_init(udev);
+        label_init("/dev");
 
         for (;;) {
                 int option;
@@ -1145,8 +1063,8 @@ int main(int argc, char *argv[])
                         break;
                 case 'D':
                         debug = true;
-                        if (udev_get_log_priority(udev) < LOG_INFO)
-                                udev_set_log_priority(udev, LOG_INFO);
+                        log_set_max_level(LOG_DEBUG);
+                        udev_set_log_priority(udev, LOG_INFO);
                         break;
                 case 'N':
                         if (strcmp (optarg, "early") == 0) {
@@ -1224,11 +1142,9 @@ int main(int argc, char *argv[])
         chdir("/");
         umask(022);
 
-        /* /run/udev */
-        mkdir(udev_get_run_path(udev), 0755);
+        mkdir("/run/udev", 0755);
 
-        /* create standard links, copy static nodes, create nodes from modules */
-        static_dev_create_links(udev);
+        dev_setup();
         static_dev_create_from_modules(udev);
 
         /* before opening new files, make sure std{in,out,err} fds are in a sane state */
@@ -1264,6 +1180,10 @@ int main(int argc, char *argv[])
                         rc = 3;
                         goto exit;
                 }
+
+                /* get our own cgroup, we regularly kill everything udev has left behind */
+                if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &udev_cgroup) < 0)
+                        udev_cgroup = NULL;
         } else {
                 /* open control and netlink socket */
                 udev_ctrl = udev_ctrl_new(udev);
@@ -1327,18 +1247,8 @@ int main(int argc, char *argv[])
 
                 setsid();
 
-                fd = open("/proc/self/oom_score_adj", O_RDWR);
-                if (fd < 0) {
-                        /* Fallback to old interface */
-                        fd = open("/proc/self/oom_adj", O_RDWR);
-                        if (fd < 0) {
-                                log_error("error disabling OOM: %m\n");
-                        } else {
-                                /* OOM_DISABLE == -17 */
-                                write(fd, "-17", 3);
-                                close(fd);
-                        }
-                } else {
+                fd = open("/proc/self/oom_score_adj", O_RDWR|O_CLOEXEC);
+                if (fd >= 0) {
                         write(fd, "-1000", 5);
                         close(fd);
                 }
@@ -1443,9 +1353,9 @@ int main(int argc, char *argv[])
 
                 /* set value depending on the amount of RAM */
                 if (memsize > 0)
-                        children_max = 128 + (memsize / 8);
+                        children_max = 16 + (memsize / 8);
                 else
-                        children_max = 128;
+                        children_max = 16;
         }
         log_debug("set children_max to %u\n", children_max);
 
@@ -1481,7 +1391,7 @@ int main(int argc, char *argv[])
 
                         /* discard queued events and kill workers */
                         event_queue_cleanup(udev, EVENT_QUEUED);
-                        worker_kill(udev, 0);
+                        worker_kill(udev);
 
                         /* exit after all has cleaned up */
                         if (udev_list_node_is_empty(&event_list) && udev_list_node_is_empty(&worker_list))
@@ -1489,14 +1399,18 @@ int main(int argc, char *argv[])
 
                         /* timeout at exit for workers to finish */
                         timeout = 30 * 1000;
-                } else if (udev_list_node_is_empty(&event_list) && children <= 2) {
+                } else if (udev_list_node_is_empty(&event_list) && !children) {
                         /* we are idle */
                         timeout = -1;
+
+                        /* cleanup possible left-over processes in our cgroup */
+                        if (udev_cgroup)
+                                cg_kill(SYSTEMD_CGROUP_CONTROLLER, udev_cgroup, SIGKILL, false, true, NULL);
                 } else {
                         /* kill idle or hanging workers */
                         timeout = 3 * 1000;
                 }
-                fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), timeout);
+                fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), timeout);
                 if (fdcount < 0)
                         continue;
 
@@ -1512,7 +1426,7 @@ int main(int argc, char *argv[])
                         /* kill idle workers */
                         if (udev_list_node_is_empty(&event_list)) {
                                 log_debug("cleanup idle workers\n");
-                                worker_kill(udev, 2);
+                                worker_kill(udev);
                         }
 
                         /* check for hanging events */
@@ -1557,7 +1471,7 @@ int main(int argc, char *argv[])
 
                 /* check for changed config, every 3 seconds at most */
                 if ((now_usec() - last_usec) > 3 * 1000 * 1000) {
-                        if (check_rules_timestamp(udev))
+                        if (udev_rules_check_timestamp(rules))
                                 reload = true;
                         if (udev_builtin_validate(udev))
                                 reload = true;
@@ -1567,7 +1481,7 @@ int main(int argc, char *argv[])
 
                 /* reload requested, HUP signal received, rules changed, builtin changed */
                 if (reload) {
-                        worker_kill(udev, 0);
+                        worker_kill(udev);
                         rules = udev_rules_unref(rules);
                         udev_builtin_exit(udev);
                         reload = 0;
@@ -1647,7 +1561,7 @@ exit_daemonize:
         udev_queue_export_unref(udev_queue_export);
         udev_ctrl_connection_unref(ctrl_conn);
         udev_ctrl_unref(udev_ctrl);
-        udev_selinux_exit(udev);
+        label_finish();
         udev_unref(udev);
         log_close();
         return rc;