chiark / gitweb /
readahead: add interface to sd-daemon.[ch] to control readahead
authorLennart Poettering <lennart@poettering.net>
Sun, 26 Sep 2010 13:50:14 +0000 (15:50 +0200)
committerLennart Poettering <lennart@poettering.net>
Sun, 26 Sep 2010 13:50:14 +0000 (15:50 +0200)
Makefile.am
fixme
man/sd_readahead.xml [new file with mode: 0644]
man/systemd-notify.xml
src/notify.c
src/readahead-collect.c
src/readahead-common.c
src/readahead-common.h
src/readahead-replay.c
src/sd-daemon.c
src/sd-daemon.h

index 2cd3deb..70a6c19 100644 (file)
@@ -421,6 +421,7 @@ MANPAGES = \
        man/systemd-cgls.1 \
        man/systemd-notify.1 \
        man/sd_notify.3 \
+       man/sd_readahead.3 \
        man/sd_booted.3 \
        man/sd_listen_fds.3 \
        man/sd_is_fifo.3 \
diff --git a/fixme b/fixme
index 16aabda..7e025c4 100644 (file)
--- a/fixme
+++ b/fixme
 
 * readahead() vs. fadvise() vs. ioprio
 
+* unneeded
+
+* properly handle multiple inotify events per read() in path.c and util.c
+
 External:
 
 * place /etc/inittab with explaining blurb.
diff --git a/man/sd_readahead.xml b/man/sd_readahead.xml
new file mode 100644 (file)
index 0000000..178f907
--- /dev/null
@@ -0,0 +1,178 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<!--
+  This file is part of systemd.
+
+  Copyright 2010 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<refentry id="sd_notify">
+
+        <refentryinfo>
+                <title>sd_readahead</title>
+                <productname>systemd</productname>
+
+                <authorgroup>
+                        <author>
+                                <contrib>Developer</contrib>
+                                <firstname>Lennart</firstname>
+                                <surname>Poettering</surname>
+                                <email>lennart@poettering.net</email>
+                        </author>
+                </authorgroup>
+        </refentryinfo>
+
+        <refmeta>
+                <refentrytitle>sd_readahead</refentrytitle>
+                <manvolnum>3</manvolnum>
+        </refmeta>
+
+        <refnamediv>
+                <refname>sd_readahead</refname>
+                <refpurpose>Control ongoing disk read-ahead operations</refpurpose>
+        </refnamediv>
+
+        <refsynopsisdiv>
+                <funcsynopsis>
+                        <funcsynopsisinfo>#include "sd-daemon.h"</funcsynopsisinfo>
+
+                        <funcprototype>
+                                <funcdef>int <function>sd_readahead</function></funcdef>
+                                <paramdef>const char *<parameter>action</parameter></paramdef>
+                        </funcprototype>
+                </funcsynopsis>
+        </refsynopsisdiv>
+
+        <refsect1>
+                <title>Description</title>
+                <para><function>sd_readahead()</function> may be
+                called by programs involved with early boot-up to
+                control ongoing disk read-ahead operations. It may be
+                used to terminate read-ahead operations in case an
+                uncommon disk access pattern is to be expected and
+                hence read-ahead replay or collection is unlikely to
+                have the desired speed-up effect on the current or
+                future boot-ups.</para>
+
+                <para>The <parameter>action</parameter> should be one
+                of the following strings:</para>
+
+                <variablelist>
+                        <varlistentry>
+                                <term>cancel</term>
+
+                                <listitem><para>Terminates read-ahead
+                                data collection, and drops all
+                                read-ahead data collected during this
+                                boot-up.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term>done</term>
+
+                                <listitem><para>Terminates read-ahead
+                                data collection, but keeps all
+                                read-ahead data collected during this
+                                boot-up around for use during
+                                subsequent boot-ups.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term>noreplay</term>
+
+                                <listitem><para>Terminates read-ahead
+                                replay.</para></listitem>
+                        </varlistentry>
+
+                </variablelist>
+
+        </refsect1>
+
+        <refsect1>
+                <title>Return Value</title>
+
+                <para>On failure, these calls return a negative
+                errno-style error code. It is generally recommended to
+                ignore the return value of this call.</para>
+        </refsect1>
+
+        <refsect1>
+                <title>Notes</title>
+
+                <para>This function is provided by the reference
+                implementation of APIs for new-style daemons and
+                distributed with the systemd package. The algorithm
+                it implements is simple, and can easily be
+                reimplemented in daemons if it is important to support
+                this interface without using the reference
+                implementation.</para>
+
+                <para>Internally, this function creates a file in
+                <filename>/dev/.systemd/readahead/</filename> which is
+                then used as flag file to notify the read-ahead
+                subsystem.</para>
+
+                <para>For details about the algorithm check the
+                liberally licensed reference implementation sources:
+                <ulink url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.c"/>
+                resp. <ulink
+                url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.h"/></para>
+
+                <para><function>sd_readahead()</function> is
+                implemented in the reference implementation's drop-in
+                <filename>sd-daemon.c</filename> and
+                <filename>sd-daemon.h</filename> files. It is
+                recommended that applications consuming this API copy
+                the implementation into their source tree. For more
+                details about the reference implementation see
+                <citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry></para>
+
+                <para>If -DDISABLE_SYSTEMD is set during compilation
+                this function will always return 0 and otherwise
+                become a NOP.</para>
+        </refsect1>
+
+        <refsect1>
+                <title>Examples</title>
+
+                <example>
+                        <title>Cancelling all read-ahead operations</title>
+
+                        <para>During boots where SELinux has to
+                        relabel the file system hierarchy, it will
+                        create a large amount of disk accesses that
+                        are not necessary during normal boots. Hence
+                        it is a good idea to disable both read-ahead replay and read-ahead collection.
+                        </para>
+
+                        <programlisting>sd_readahead("cancel");
+sd_readahead("noreplay");</programlisting>
+                </example>
+
+        </refsect1>
+
+        <refsect1>
+                <title>See Also</title>
+                <para>
+                        <citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+                        <citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
+                        <citerefentry><refentrytitle>daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+                </para>
+        </refsect1>
+
+</refentry>
index 5286418..59d6b2f 100644 (file)
                                 semantics of this option see
                                 <citerefentry><refentrytitle>sd_booted</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
                         </varlistentry>
+
+                        <varlistentry>
+                                <term><option>--readahead=</option></term>
+
+                                <listitem><para>Controls disk
+                                read-ahead operations. The argument
+                                must be a string, and either "cancel",
+                                "done" or "noreplay". For details
+                                about the semantics of this option see
+                                <citerefentry><refentrytitle>sd_readahead</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
+                        </varlistentry>
                 </variablelist>
 
         </refsect1>
index 28cfe23..61c4b0f 100644 (file)
@@ -36,16 +36,18 @@ static bool arg_ready = false;
 static pid_t arg_pid = 0;
 static const char *arg_status = NULL;
 static bool arg_booted = false;
+static const char *arg_readahead = NULL;
 
 static int help(void) {
 
         printf("%s [OPTIONS...] [VARIABLE=VALUE...]\n\n"
                "Notify the init system about service status updates.\n\n"
-               "  -h --help         Show this help\n"
-               "     --ready        Inform the init system about service start-up completion\n"
-               "     --pid[=PID]    Set main pid of daemon\n"
-               "     --status=TEXT  Set status text\n"
-               "     --booted       Returns 0 if the system was booted up with systemd, non-zero otherwise\n",
+               "  -h --help             Show this help\n"
+               "     --ready            Inform the init system about service start-up completion\n"
+               "     --pid[=PID]        Set main pid of daemon\n"
+               "     --status=TEXT      Set status text\n"
+               "     --booted           Returns 0 if the system was booted up with systemd, non-zero otherwise\n"
+               "     --readahead=ACTION Controls read-ahead operations\n",
                program_invocation_short_name);
 
         return 0;
@@ -57,16 +59,18 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_READY = 0x100,
                 ARG_PID,
                 ARG_STATUS,
-                ARG_BOOTED
+                ARG_BOOTED,
+                ARG_READAHEAD
         };
 
         static const struct option options[] = {
-                { "help",      no_argument,       NULL, 'h'         },
-                { "ready",     no_argument,       NULL, ARG_READY   },
-                { "pid",       optional_argument, NULL, ARG_PID     },
-                { "status",    required_argument, NULL, ARG_STATUS  },
-                { "booted",    no_argument,       NULL, ARG_BOOTED  },
-                { NULL,        0,                 NULL, 0           }
+                { "help",      no_argument,       NULL, 'h'           },
+                { "ready",     no_argument,       NULL, ARG_READY     },
+                { "pid",       optional_argument, NULL, ARG_PID       },
+                { "status",    required_argument, NULL, ARG_STATUS    },
+                { "booted",    no_argument,       NULL, ARG_BOOTED    },
+                { "readahead", required_argument, NULL, ARG_READAHEAD },
+                { NULL,        0,                 NULL, 0             }
         };
 
         int c;
@@ -106,6 +110,10 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_booted = true;
                         break;
 
+                case ARG_READAHEAD:
+                        arg_readahead = optarg;
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -119,7 +127,8 @@ static int parse_argv(int argc, char *argv[]) {
             !arg_ready &&
             !arg_status &&
             !arg_pid &&
-            !arg_booted) {
+            !arg_booted &&
+            !arg_readahead) {
                 help();
                 return -EINVAL;
         }
@@ -144,6 +153,13 @@ int main(int argc, char* argv[]) {
         if (arg_booted)
                 return sd_booted() <= 0;
 
+        if (arg_readahead) {
+                if ((r = sd_readahead(arg_readahead)) < 0) {
+                        log_error("Failed to issue read-ahead control command: %s", strerror(-r));
+                        goto finish;
+                }
+        }
+
         if (arg_ready)
                 our_env[i++] = (char*) "READY=1";
 
index 817b958..aa136ce 100644 (file)
@@ -41,6 +41,7 @@
 #include <sys/ioctl.h>
 #include <sys/vfs.h>
 #include <getopt.h>
+#include <sys/inotify.h>
 
 #include "missing.h"
 #include "util.h"
@@ -56,6 +57,7 @@
  * - sd_readahead_cancel
  * - gzip?
  * - remount rw?
+ * - handle files where nothing is in mincore
  * - does ioprio_set work with fadvise()?
  */
 
@@ -199,12 +201,13 @@ static int qsort_compare(const void *a, const void *b) {
 
 static int collect(const char *root) {
         enum {
-                FD_FANOTIFY,
+                FD_FANOTIFY,  /* Get the actualy fs events */
                 FD_SIGNAL,
+                FD_INOTIFY,   /* We get notifications to quit early via this fd */
                 _FD_MAX
         };
         struct pollfd pollfd[_FD_MAX];
-        int fanotify_fd = -1, signal_fd = -1, r = 0;
+        int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0;
         pid_t my_pid;
         Hashmap *files = NULL;
         Iterator i;
@@ -251,6 +254,11 @@ static int collect(const char *root) {
                 goto finish;
         }
 
+        if ((inotify_fd = open_inotify()) < 0) {
+                r = inotify_fd;
+                goto finish;
+        }
+
         not_after = now(CLOCK_MONOTONIC) + arg_timeout;
 
         my_pid = getpid();
@@ -260,6 +268,8 @@ static int collect(const char *root) {
         pollfd[FD_FANOTIFY].events = POLLIN;
         pollfd[FD_SIGNAL].fd = signal_fd;
         pollfd[FD_SIGNAL].events = POLLIN;
+        pollfd[FD_INOTIFY].fd = inotify_fd;
+        pollfd[FD_INOTIFY].events = POLLIN;
 
         sd_notify(0,
                   "READY=1\n"
@@ -267,6 +277,17 @@ static int collect(const char *root) {
 
         log_debug("Collecting...");
 
+        if (access("/dev/.systemd/readahead/cancel", F_OK) >= 0) {
+                log_debug("Collection canceled");
+                r = -ECANCELED;
+                goto finish;
+        }
+
+        if (access("/dev/.systemd/readahead/done", F_OK) >= 0) {
+                log_debug("Got termination request");
+                goto done;
+        }
+
         for (;;) {
                 union {
                         struct fanotify_event_metadata metadata;
@@ -298,14 +319,52 @@ static int collect(const char *root) {
                         goto finish;
                 }
 
-                if (pollfd[FD_SIGNAL].revents != 0)
-                        break;
-
                 if (h == 0) {
                         log_debug("Reached maximum collection time, ending collection.");
                         break;
                 }
 
+                if (pollfd[FD_SIGNAL].revents) {
+                        log_debug("Got signal.");
+                        break;
+                }
+
+                if (pollfd[FD_INOTIFY].revents) {
+                        uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
+                        struct inotify_event *e;
+
+                        if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
+                                if (errno == EINTR || errno == EAGAIN)
+                                        continue;
+
+                                log_error("Failed to read inotify event: %m");
+                                r = -errno;
+                                goto finish;
+                        }
+
+                        e = (struct inotify_event*) inotify_buffer;
+                        while (n > 0) {
+                                size_t step;
+
+                                if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) {
+                                        log_debug("Collection canceled");
+                                        r = -ECANCELED;
+                                        goto finish;
+                                }
+
+                                if ((e->mask & IN_CREATE) && streq(e->name, "done")) {
+                                        log_debug("Got termination request");
+                                        goto done;
+                                }
+
+                                step = sizeof(struct inotify_event) + e->len;
+                                assert(step <= (size_t) n);
+
+                                e = (struct inotify_event*) ((uint8_t*) e + step);
+                                n -= step;
+                        }
+                }
+
                 if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) {
 
                         if (errno == EINTR || errno == EAGAIN)
@@ -352,6 +411,7 @@ static int collect(const char *root) {
                 }
         }
 
+done:
         if (fanotify_fd >= 0) {
                 close_nointr_nofail(fanotify_fd);
                 fanotify_fd = -1;
@@ -451,6 +511,9 @@ finish:
         if (signal_fd >= 0)
                 close_nointr_nofail(signal_fd);
 
+        if (inotify_fd >= 0)
+                close_nointr_nofail(inotify_fd);
+
         if (pack) {
                 fclose(pack);
                 unlink(pack_fn_new);
index a1016a3..a2f6f17 100644 (file)
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/sysinfo.h>
+#include <sys/inotify.h>
 
 #include "log.h"
 #include "readahead-common.h"
@@ -116,3 +117,23 @@ bool enough_ram(void) {
                                                * with at least 128MB
                                                * memory */
 }
+
+int open_inotify(void) {
+        int fd;
+
+        if ((fd = inotify_init1(IN_CLOEXEC|IN_NONBLOCK)) < 0) {
+                log_error("Failed to create inotify handle: %m");
+                return -errno;
+        }
+
+        mkdir("/dev/.systemd", 0755);
+        mkdir("/dev/.systemd/readahead", 0755);
+
+        if (inotify_add_watch(fd, "/dev/.systemd/readahead", IN_CREATE) < 0) {
+                log_error("Failed to watch /dev/.systemd/readahead: %m");
+                close_nointr_nofail(fd);
+                return -errno;
+        }
+
+        return fd;
+}
index c7fd713..3f64f29 100644 (file)
@@ -32,4 +32,6 @@ int fs_on_ssd(const char *p);
 
 bool enough_ram(void);
 
+int open_inotify(void);
+
 #endif
index d4ddf26..a5a2936 100644 (file)
@@ -33,6 +33,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <sys/inotify.h>
 
 #include "missing.h"
 #include "util.h"
@@ -119,6 +120,7 @@ static int replay(const char *root) {
         char *pack_fn = NULL, c;
         bool on_ssd, ready = false;
         int prio;
+        int inotify_fd = -1;
 
         assert(root);
 
@@ -141,6 +143,11 @@ static int replay(const char *root) {
                 goto finish;
         }
 
+        if ((inotify_fd = open_inotify()) < 0) {
+                r = inotify_fd;
+                goto finish;
+        }
+
         if (!(fgets(line, sizeof(line), pack))) {
                 log_error("Premature end of pack file.");
                 r = -EIO;
@@ -177,8 +184,40 @@ static int replay(const char *root) {
 
         log_debug("Replaying...");
 
+        if (access("/dev/.systemd/readahead/noreplay", F_OK) >= 0) {
+                log_debug("Got termination request");
+                goto done;
+        }
+
         while (!feof(pack) && !ferror(pack)) {
+                uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
                 int k;
+                ssize_t n;
+
+                if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
+                        if (errno != EINTR && errno != EAGAIN) {
+                                log_error("Failed to read inotify event: %m");
+                                r = -errno;
+                                goto finish;
+                        }
+                } else {
+                        struct inotify_event *e = (struct inotify_event*) inotify_buffer;
+
+                        while (n > 0) {
+                                size_t step;
+
+                                if ((e->mask & IN_CREATE) && streq(e->name, "noreplay")) {
+                                        log_debug("Got termination request");
+                                        goto done;
+                                }
+
+                                step = sizeof(struct inotify_event) + e->len;
+                                assert(step <= (size_t) n);
+
+                                e = (struct inotify_event*) ((uint8_t*) e + step);
+                                n -= step;
+                        }
+                }
 
                 if ((k = unpack_file(pack)) < 0) {
                         r = k;
@@ -193,6 +232,7 @@ static int replay(const char *root) {
                 }
         }
 
+done:
         if (!ready)
                 sd_notify(0, "READY=1");
 
@@ -208,6 +248,9 @@ finish:
         if (pack)
                 fclose(pack);
 
+        if (inotify_fd >= 0)
+                close_nointr_nofail(inotify_fd);
+
         free(pack_fn);
 
         return r;
index 9c23b91..316fccc 100644 (file)
@@ -433,3 +433,41 @@ int sd_booted(void) {
         return a.st_dev != b.st_dev;
 #endif
 }
+
+static int touch(const char *path) {
+
+#if !defined(DISABLE_SYSTEMD) && defined(__linux__)
+        int fd;
+
+        mkdir("/dev/.systemd", 0755);
+        mkdir("/dev/.systemd/readahead", 0755);
+
+        if ((fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0666)) < 0)
+                return -errno;
+
+        for (;;) {
+                if (close(fd) >= 0)
+                        break;
+
+                if (errno != -EINTR)
+                        return -errno;
+        }
+
+#endif
+        return 0;
+}
+
+int sd_readahead(const char *action) {
+
+        if (!action)
+                return -EINVAL;
+
+        if (strcmp(action, "cancel") == 0)
+                return touch("/dev/.systemd/readahead/cancel");
+        else if (strcmp(action, "done") == 0)
+                return touch("/dev/.systemd/readahead/done");
+        else if (strcmp(action, "noreplay") == 0)
+                return touch("/dev/.systemd/readahead/noreplay");
+
+        return -EINVAL;
+}
index 008a44c..2fbfe95 100644 (file)
@@ -254,6 +254,16 @@ int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_attr_(
 */
 int sd_booted(void) _sd_hidden_;
 
+/*
+  Controls ongoing disk read-ahead operations during boot-up. The argument
+  must be a string, and either "cancel", "done" or "noreplay".
+
+  cancel = terminate read-ahead data collection, drop collected information
+  done = terminate read-ahead data collection, keep collected information
+  noreplay = terminate read-ahead replay
+*/
+int sd_readahead(const char *action);
+
 #ifdef __cplusplus
 }
 #endif