chiark / gitweb /
tmpfiles: separate a generic item glob processing function
[elogind.git] / src / readahead-collect.c
index 0637c91de30dacd2178524a6d3fc4435550d60d9..eac11e7e5c160477f6c8ba61fd4d2394a67b6b97 100644 (file)
@@ -41,6 +41,7 @@
 #include <sys/ioctl.h>
 #include <sys/vfs.h>
 #include <getopt.h>
+#include <sys/inotify.h>
 
 #include "missing.h"
 #include "util.h"
 #include "sd-daemon.h"
 #include "ioprio.h"
 #include "readahead-common.h"
+#include "virt.h"
 
 /* fixme:
  *
  * - detect ssd on btrfs/lvm...
  * - read ahead directories
- * - sd_readahead_cancel
  * - gzip?
  * - remount rw?
+ * - handle files where nothing is in mincore
  * - does ioprio_set work with fadvise()?
  */
 
@@ -63,6 +65,12 @@ static unsigned arg_files_max = 16*1024;
 static off_t arg_file_size_max = READAHEAD_FILE_SIZE_MAX;
 static usec_t arg_timeout = 2*USEC_PER_MINUTE;
 
+static ReadaheadShared *shared = NULL;
+
+/* Avoid collisions with the NULL pointer */
+#define SECTOR_TO_PTR(s) ULONG_TO_PTR((s)+1)
+#define PTR_TO_SECTOR(p) (PTR_TO_ULONG(p)-1)
+
 static int btrfs_defrag(int fd) {
         struct btrfs_ioctl_vol_args data;
 
@@ -85,6 +93,13 @@ static int pack_file(FILE *pack, const char *fn, bool on_btrfs) {
         assert(fn);
 
         if ((fd = open(fn, O_RDONLY|O_CLOEXEC|O_NOATIME|O_NOCTTY|O_NOFOLLOW)) < 0) {
+
+                if (errno == ENOENT)
+                        return 0;
+
+                if (errno == EPERM || errno == EACCES)
+                        return 0;
+
                 log_warning("open(%s) failed: %m", fn);
                 r = -errno;
                 goto finish;
@@ -105,9 +120,10 @@ static int pack_file(FILE *pack, const char *fn, bool on_btrfs) {
                 goto finish;
         }
 
-        pages = l / PAGE_SIZE;
+        pages = l / page_size();
 
         vec = alloca(pages);
+        memset(vec, 0, pages);
         if (mincore(start, l, vec) < 0) {
                 log_warning("mincore(%s) failed: %m", fn);
                 r = -errno;
@@ -199,12 +215,13 @@ static int qsort_compare(const void *a, const void *b) {
 
 static int collect(const char *root) {
         enum {
-                FD_FANOTIFY,
+                FD_FANOTIFY,  /* Get the actual fs events */
                 FD_SIGNAL,
+                FD_INOTIFY,   /* We get notifications to quit early via this fd */
                 _FD_MAX
         };
         struct pollfd pollfd[_FD_MAX];
-        int fanotify_fd = -1, signal_fd = -1, r = 0;
+        int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0;
         pid_t my_pid;
         Hashmap *files = NULL;
         Iterator i;
@@ -251,6 +268,11 @@ static int collect(const char *root) {
                 goto finish;
         }
 
+        if ((inotify_fd = open_inotify()) < 0) {
+                r = inotify_fd;
+                goto finish;
+        }
+
         not_after = now(CLOCK_MONOTONIC) + arg_timeout;
 
         my_pid = getpid();
@@ -260,6 +282,8 @@ static int collect(const char *root) {
         pollfd[FD_FANOTIFY].events = POLLIN;
         pollfd[FD_SIGNAL].fd = signal_fd;
         pollfd[FD_SIGNAL].events = POLLIN;
+        pollfd[FD_INOTIFY].fd = inotify_fd;
+        pollfd[FD_INOTIFY].events = POLLIN;
 
         sd_notify(0,
                   "READY=1\n"
@@ -267,6 +291,17 @@ static int collect(const char *root) {
 
         log_debug("Collecting...");
 
+        if (access("/run/systemd/readahead/cancel", F_OK) >= 0) {
+                log_debug("Collection canceled");
+                r = -ECANCELED;
+                goto finish;
+        }
+
+        if (access("/run/systemd/readahead/done", F_OK) >= 0) {
+                log_debug("Got termination request");
+                goto done;
+        }
+
         for (;;) {
                 union {
                         struct fanotify_event_metadata metadata;
@@ -298,60 +333,117 @@ static int collect(const char *root) {
                         goto finish;
                 }
 
-                if (pollfd[FD_SIGNAL].revents != 0)
-                        break;
-
                 if (h == 0) {
                         log_debug("Reached maximum collection time, ending collection.");
                         break;
                 }
 
+                if (pollfd[FD_SIGNAL].revents) {
+                        log_debug("Got signal.");
+                        break;
+                }
+
+                if (pollfd[FD_INOTIFY].revents) {
+                        uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
+                        struct inotify_event *e;
+
+                        if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
+                                if (errno == EINTR || errno == EAGAIN)
+                                        continue;
+
+                                log_error("Failed to read inotify event: %m");
+                                r = -errno;
+                                goto finish;
+                        }
+
+                        e = (struct inotify_event*) inotify_buffer;
+                        while (n > 0) {
+                                size_t step;
+
+                                if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) {
+                                        log_debug("Collection canceled");
+                                        r = -ECANCELED;
+                                        goto finish;
+                                }
+
+                                if ((e->mask & IN_CREATE) && streq(e->name, "done")) {
+                                        log_debug("Got termination request");
+                                        goto done;
+                                }
+
+                                step = sizeof(struct inotify_event) + e->len;
+                                assert(step <= (size_t) n);
+
+                                e = (struct inotify_event*) ((uint8_t*) e + step);
+                                n -= step;
+                        }
+                }
+
                 if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) {
 
                         if (errno == EINTR || errno == EAGAIN)
                                 continue;
 
+                        /* fanotify sometimes returns EACCES on read()
+                         * where it shouldn't. For now let's just
+                         * ignore it here (which is safe), but
+                         * eventually this should be
+                         * dropped when the kernel is fixed.
+                         *
+                         * https://bugzilla.redhat.com/show_bug.cgi?id=707577 */
+                        if (errno == EACCES)
+                                continue;
+
                         log_error("Failed to read event: %m");
                         r = -errno;
                         goto finish;
                 }
 
                 for (m = &data.metadata; FAN_EVENT_OK(m, n); m = FAN_EVENT_NEXT(m, n)) {
+                        char fn[PATH_MAX];
+                        int k;
 
-                        if (m->pid != my_pid && m->fd >= 0) {
-                                char fn[PATH_MAX];
-                                int k;
+                        if (m->fd < 0)
+                                goto next_iteration;
 
-                                snprintf(fn, sizeof(fn), "/proc/self/fd/%i", m->fd);
-                                char_array_0(fn);
+                        if (m->pid == my_pid)
+                                goto next_iteration;
 
-                                if ((k = readlink_malloc(fn, &p)) >= 0) {
+                        __sync_synchronize();
+                        if (m->pid == shared->replay)
+                                goto next_iteration;
 
-                                        if (startswith(p, "/tmp") ||
-                                            hashmap_get(files, p))
-                                                /* Not interesting, or
-                                                 * already read */
-                                                free(p);
-                                        else {
-                                                unsigned long ul;
+                        snprintf(fn, sizeof(fn), "/proc/self/fd/%i", m->fd);
+                        char_array_0(fn);
 
-                                                ul = fd_first_block(m->fd);
+                        if ((k = readlink_malloc(fn, &p)) >= 0) {
+                                if (startswith(p, "/tmp") ||
+                                    endswith(p, " (deleted)") ||
+                                    hashmap_get(files, p))
+                                        /* Not interesting, or
+                                         * already read */
+                                        free(p);
+                                else {
+                                        unsigned long ul;
 
-                                                if ((k = hashmap_put(files, p, ULONG_TO_PTR(ul))) < 0) {
-                                                        log_warning("set_put() failed: %s", strerror(-k));
-                                                        free(p);
-                                                }
+                                        ul = fd_first_block(m->fd);
+
+                                        if ((k = hashmap_put(files, p, SECTOR_TO_PTR(ul))) < 0) {
+                                                log_warning("set_put() failed: %s", strerror(-k));
+                                                free(p);
                                         }
+                                }
 
-                                } else
-                                        log_warning("readlink(%s) failed: %s", fn, strerror(-k));
-                        }
+                        } else
+                                log_warning("readlink(%s) failed: %s", fn, strerror(-k));
 
+                next_iteration:
                         if (m->fd)
                                 close_nointr_nofail(m->fd);
                 }
         }
 
+done:
         if (fanotify_fd >= 0) {
                 close_nointr_nofail(fanotify_fd);
                 fanotify_fd = -1;
@@ -359,10 +451,10 @@ static int collect(const char *root) {
 
         log_debug("Writing Pack File...");
 
-        on_ssd = fs_on_ssd(root);
+        on_ssd = fs_on_ssd(root) > 0;
         log_debug("On SSD: %s", yes_no(on_ssd));
 
-        on_btrfs = statfs(root, &sfs) >= 0 && sfs.f_type == BTRFS_SUPER_MAGIC;
+        on_btrfs = statfs(root, &sfs) >= 0 && (long) sfs.f_type == (long) BTRFS_SUPER_MAGIC;
         log_debug("On btrfs: %s", yes_no(on_btrfs));
 
         asprintf(&pack_fn, "%s/.readahead", root);
@@ -409,7 +501,7 @@ static int collect(const char *root) {
                 j = ordered;
                 HASHMAP_FOREACH_KEY(q, p, files, i) {
                         j->path = p;
-                        j->block = PTR_TO_ULONG(q);
+                        j->block = PTR_TO_SECTOR(q);
                         j++;
                 }
 
@@ -451,6 +543,9 @@ finish:
         if (signal_fd >= 0)
                 close_nointr_nofail(signal_fd);
 
+        if (inotify_fd >= 0)
+                close_nointr_nofail(inotify_fd);
+
         if (pack) {
                 fclose(pack);
                 unlink(pack_fn_new);
@@ -460,7 +555,7 @@ finish:
         free(pack_fn);
 
         while ((p = hashmap_steal_first_key(files)))
-                free(q);
+                free(p);
 
         hashmap_free(files);
 
@@ -556,20 +651,41 @@ static int parse_argv(int argc, char *argv[]) {
 
 int main(int argc, char *argv[]) {
         int r;
+        const char *root;
 
         log_set_target(LOG_TARGET_SYSLOG_OR_KMSG);
         log_parse_environment();
         log_open();
 
+        umask(0022);
+
         if ((r = parse_argv(argc, argv)) <= 0)
                 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
 
+        root = optind < argc ? argv[optind] : "/";
+
+        if (fs_on_read_only(root) > 0) {
+                log_info("Disabling readahead collector due to read-only media.");
+                return 0;
+        }
+
         if (!enough_ram()) {
                 log_info("Disabling readahead collector due to low memory.");
                 return 0;
         }
 
-        if (collect(optind < argc ? argv[optind] : "/") < 0)
+        if (detect_virtualization(NULL) > 0) {
+                log_info("Disabling readahead collector due to execution in virtualized environment.");
+                return 0;
+        }
+
+        if (!(shared = shared_get()))
+                return 1;
+
+        shared->collect = getpid();
+        __sync_synchronize();
+
+        if (collect(root) < 0)
                 return 1;
 
         return 0;