chiark / gitweb /
bootchart: it's not OK to return -1 from a main program
[elogind.git] / src / bootchart / bootchart.c
index 052e6370c98ebe83a8e76853b13e2aefaba87111..8ef5ad18a6ac742609fa83b1cd84098e6a77efe1 100644 (file)
@@ -3,7 +3,7 @@
 /***
   This file is part of systemd.
 
-  Copyright (C) 2009-2013 Intel Coproration
+  Copyright (C) 2009-2013 Intel Corporation
 
   Authors:
     Auke Kok <auke-jan.h.kok@intel.com>
@@ -48,6 +48,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <stdbool.h>
+#include "systemd/sd-journal.h"
 
 #include "util.h"
 #include "fileio.h"
 #include "store.h"
 #include "svg.h"
 #include "bootchart.h"
+#include "list.h"
 
 double graph_start;
 double log_start;
-double sampletime[MAXSAMPLES];
 struct ps_struct *ps_first;
-struct block_stat_struct blockstat[MAXSAMPLES];
-int entropy_avail[MAXSAMPLES];
-struct cpu_stat_struct cpustat[MAXCPUS];
 int pscount;
 int cpus;
 double interval;
@@ -74,21 +72,31 @@ int overrun = 0;
 static int exiting = 0;
 int sysfd=-1;
 
+#define DEFAULT_SAMPLES_LEN 500
+#define DEFAULT_HZ 25.0
+#define DEFAULT_SCALE_X 100.0 /* 100px = 1sec */
+#define DEFAULT_SCALE_Y 20.0  /* 16px = 1 process bar */
+#define DEFAULT_INIT "/sbin/init"
+#define DEFAULT_OUTPUT "/run/log"
+
 /* graph defaults */
 bool arg_entropy = false;
 bool initcall = true;
 bool arg_relative = false;
 bool arg_filter = true;
 bool arg_show_cmdline = false;
+bool arg_show_cgroup = false;
 bool arg_pss = false;
 int samples;
-int arg_samples_len = 500; /* we record len+1 (1 start sample) */
-double arg_hz = 25.0;   /* 20 seconds log time */
-double arg_scale_x = 100.0; /* 100px = 1sec */
-double arg_scale_y = 20.0;  /* 16px = 1 process bar */
+int arg_samples_len = DEFAULT_SAMPLES_LEN; /* we record len+1 (1 start sample) */
+double arg_hz = DEFAULT_HZ;
+double arg_scale_x = DEFAULT_SCALE_X;
+double arg_scale_y = DEFAULT_SCALE_Y;
+static struct list_sample_data *sampledata;
+struct list_sample_data *head;
 
-char arg_init_path[PATH_MAX] = "/sbin/init";
-char arg_output_path[PATH_MAX] = "/run/log";
+char arg_init_path[PATH_MAX] = DEFAULT_INIT;
+char arg_output_path[PATH_MAX] = DEFAULT_OUTPUT;
 
 static void signal_handler(int sig) {
         if (sig++)
@@ -98,6 +106,8 @@ static void signal_handler(int sig) {
 
 #define BOOTCHART_CONF "/etc/systemd/bootchart.conf"
 
+#define BOOTCHART_MAX (16*1024*1024)
+
 static void parse_conf(void) {
         char *init = NULL, *output = NULL;
         const ConfigTableItem items[] = {
@@ -111,19 +121,14 @@ static void parse_conf(void) {
                 { "Bootchart", "PlotEntropyGraph", config_parse_bool,   0, &arg_entropy     },
                 { "Bootchart", "ScaleX",           config_parse_double, 0, &arg_scale_x     },
                 { "Bootchart", "ScaleY",           config_parse_double, 0, &arg_scale_y     },
+                { "Bootchart", "ControlGroup",     config_parse_bool,   0, &arg_show_cgroup },
                 { NULL, NULL, NULL, 0, NULL }
         };
-        _cleanup_fclose_ FILE *f;
-        int r;
 
-        f = fopen(BOOTCHART_CONF, "re");
-        if (!f)
-                return;
-
-        r = config_parse(BOOTCHART_CONF, f,
-                         NULL, config_item_table_lookup, (void*) items, true, NULL);
-        if (r < 0)
-                log_warning("Failed to parse configuration file: %s", strerror(-r));
+        config_parse(NULL, BOOTCHART_CONF, NULL,
+                     NULL,
+                     config_item_table_lookup, items,
+                     true, false, true, NULL);
 
         if (init != NULL)
                 strscpy(arg_init_path, sizeof(arg_init_path), init);
@@ -131,28 +136,58 @@ static void parse_conf(void) {
                 strscpy(arg_output_path, sizeof(arg_output_path), output);
 }
 
-static int parse_args(int argc, char *argv[]) {
-        static struct option options[] = {
-                {"rel",       no_argument,        NULL,  'r'},
-                {"freq",      required_argument,  NULL,  'f'},
-                {"samples",   required_argument,  NULL,  'n'},
-                {"pss",       no_argument,        NULL,  'p'},
-                {"output",    required_argument,  NULL,  'o'},
-                {"init",      required_argument,  NULL,  'i'},
-                {"no-filter", no_argument,        NULL,  'F'},
-                {"cmdline",   no_argument,        NULL,  'C'},
-                {"help",      no_argument,        NULL,  'h'},
-                {"scale-x",   required_argument,  NULL,  'x'},
-                {"scale-y",   required_argument,  NULL,  'y'},
-                {"entropy",   no_argument,        NULL,  'e'},
-                {NULL, 0, NULL, 0}
+static void help(void) {
+        fprintf(stdout,
+                "Usage: %s [OPTIONS]\n\n"
+                "Options:\n"
+                "  -r, --rel             Record time relative to recording\n"
+                "  -f, --freq=FREQ       Sample frequency [%g]\n"
+                "  -n, --samples=N       Stop sampling at [%d] samples\n"
+                "  -x, --scale-x=N       Scale the graph horizontally [%g] \n"
+                "  -y, --scale-y=N       Scale the graph vertically [%g] \n"
+                "  -p, --pss             Enable PSS graph (CPU intensive)\n"
+                "  -e, --entropy         Enable the entropy_avail graph\n"
+                "  -o, --output=PATH     Path to output files [%s]\n"
+                "  -i, --init=PATH       Path to init executable [%s]\n"
+                "  -F, --no-filter       Disable filtering of unimportant or ephemeral processes\n"
+                "  -C, --cmdline         Display full command lines with arguments\n"
+                "  -c, --control-group   Display process control group\n"
+                "  -h, --help            Display this message\n\n"
+                "See bootchart.conf for more information.\n",
+                program_invocation_short_name,
+                DEFAULT_HZ,
+                DEFAULT_SAMPLES_LEN,
+                DEFAULT_SCALE_X,
+                DEFAULT_SCALE_Y,
+                DEFAULT_OUTPUT,
+                DEFAULT_INIT);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+        static const struct option options[] = {
+                {"rel",           no_argument,        NULL,  'r'},
+                {"freq",          required_argument,  NULL,  'f'},
+                {"samples",       required_argument,  NULL,  'n'},
+                {"pss",           no_argument,        NULL,  'p'},
+                {"output",        required_argument,  NULL,  'o'},
+                {"init",          required_argument,  NULL,  'i'},
+                {"no-filter",     no_argument,        NULL,  'F'},
+                {"cmdline",       no_argument,        NULL,  'C'},
+                {"control-group", no_argument,        NULL,  'c'},
+                {"help",          no_argument,        NULL,  'h'},
+                {"scale-x",       required_argument,  NULL,  'x'},
+                {"scale-y",       required_argument,  NULL,  'y'},
+                {"entropy",       no_argument,        NULL,  'e'},
+                {}
         };
-        int c;
+        int c, r;
 
-        while ((c = getopt_long(argc, argv, "erpf:n:o:i:FChx:y:", options, NULL)) >= 0) {
-                int r;
+        if (getpid() == 1)
+                opterr = 0;
 
+        while ((c = getopt_long(argc, argv, "erpf:n:o:i:FCchx:y:", options, NULL)) >= 0)
                 switch (c) {
+
                 case 'r':
                         arg_relative = true;
                         break;
@@ -168,6 +203,9 @@ static int parse_args(int argc, char *argv[]) {
                 case 'C':
                         arg_show_cmdline = true;
                         break;
+                case 'c':
+                        arg_show_cgroup = true;
+                        break;
                 case 'n':
                         r = safe_atoi(optarg, &arg_samples_len);
                         if (r < 0)
@@ -201,57 +239,90 @@ static int parse_args(int argc, char *argv[]) {
                         arg_entropy = true;
                         break;
                 case 'h':
-                        fprintf(stderr, "Usage: %s [OPTIONS]\n", argv[0]);
-                        fprintf(stderr, " --rel,       -r          Record time relative to recording\n");
-                        fprintf(stderr, " --freq,      -f f        Sample frequency [%f]\n", arg_hz);
-                        fprintf(stderr, " --samples,   -n N        Stop sampling at [%d] samples\n", arg_samples_len);
-                        fprintf(stderr, " --scale-x,   -x N        Scale the graph horizontally [%f] \n", arg_scale_x);
-                        fprintf(stderr, " --scale-y,   -y N        Scale the graph vertically [%f] \n", arg_scale_y);
-                        fprintf(stderr, " --pss,       -p          Enable PSS graph (CPU intensive)\n");
-                        fprintf(stderr, " --entropy,   -e          Enable the entropy_avail graph\n");
-                        fprintf(stderr, " --output,    -o [PATH]   Path to output files [%s]\n", arg_output_path);
-                        fprintf(stderr, " --init,      -i [PATH]   Path to init executable [%s]\n", arg_init_path);
-                        fprintf(stderr, " --no-filter, -F          Disable filtering of processes from the graph\n");
-                        fprintf(stderr, "                          that are of less importance or short-lived\n");
-                        fprintf(stderr, " --cmdline,   -C          Display the full command line with arguments\n");
-                        fprintf(stderr, "                          of processes, instead of only the process name\n");
-                        fprintf(stderr, " --help,      -h          Display this message\n");
-                        fprintf(stderr, "See bootchart.conf for more information.\n");
-                        exit (EXIT_SUCCESS);
-                        break;
+                        help();
+                        return 0;
+                case '?':
+                        if (getpid() != 1)
+                                return -EINVAL;
+                        else
+                                return 0;
                 default:
-                        break;
+                        assert_not_reached("Unhandled option code.");
                 }
-        }
 
-        if (arg_samples_len > MAXSAMPLES) {
-                fprintf(stderr, "Error: samples exceeds maximum\n");
+        if (arg_hz <= 0) {
+                log_error("Frequency needs to be > 0");
                 return -EINVAL;
         }
 
-        if (arg_hz <= 0.0) {
-                fprintf(stderr, "Error: Frequency needs to be > 0\n");
-                return -EINVAL;
+        return 1;
+}
+
+static void do_journal_append(char *file) {
+        struct iovec iovec[5];
+        int r, f, j = 0;
+        ssize_t n;
+        _cleanup_free_ char *bootchart_file = NULL, *bootchart_message = NULL,
+                *p = NULL;
+
+        bootchart_file = strappend("BOOTCHART_FILE=", file);
+        if (bootchart_file)
+                IOVEC_SET_STRING(iovec[j++], bootchart_file);
+
+        IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=9f26aa562cf440c2b16c773d0479b518");
+        IOVEC_SET_STRING(iovec[j++], "PRIORITY=7");
+        bootchart_message = strjoin("MESSAGE=Bootchart created: ", file, NULL);
+        if (bootchart_message)
+                IOVEC_SET_STRING(iovec[j++], bootchart_message);
+
+        p = malloc(9 + BOOTCHART_MAX);
+        if (!p) {
+                log_oom();
+                return;
         }
 
-        return 0;
+        memcpy(p, "BOOTCHART=", 10);
+
+        f = open(file, O_RDONLY|O_CLOEXEC);
+        if (f < 0) {
+                log_error("Failed to read bootchart data: %m");
+                return;
+        }
+        n = loop_read(f, p + 10, BOOTCHART_MAX, false);
+        if (n < 0) {
+                log_error("Failed to read bootchart data: %s", strerror(-n));
+                close(f);
+                return;
+        }
+        close(f);
+
+        iovec[j].iov_base = p;
+        iovec[j].iov_len = 10 + n;
+        j++;
+
+        r = sd_journal_sendv(iovec, j);
+        if (r < 0)
+                log_error("Failed to send bootchart: %s", strerror(-r));
 }
 
 int main(int argc, char *argv[]) {
         _cleanup_free_ char *build = NULL;
-        struct sigaction sig;
+        struct sigaction sig = {
+                .sa_handler = signal_handler,
+        };
         struct ps_struct *ps;
         char output_file[PATH_MAX];
         char datestr[200];
         time_t t = 0;
         int r;
         struct rlimit rlim;
+        bool has_procfs = false;
 
         parse_conf();
 
-        r = parse_args(argc, argv);
-        if (r < 0)
-                return EXIT_FAILURE;
+        r = parse_argv(argc, argv);
+        if (r <= 0)
+                return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 
         /*
          * If the kernel executed us through init=/usr/lib/systemd/systemd-bootchart, then
@@ -272,23 +343,33 @@ int main(int argc, char *argv[]) {
         (void) setrlimit(RLIMIT_NOFILE, &rlim);
 
         /* start with empty ps LL */
-        ps_first = calloc(1, sizeof(struct ps_struct));
+        ps_first = new0(struct ps_struct, 1);
         if (!ps_first) {
-                perror("calloc(ps_struct)");
-                exit(EXIT_FAILURE);
+                log_oom();
+                return EXIT_FAILURE;
         }
 
         /* handle TERM/INT nicely */
-        memset(&sig, 0, sizeof(struct sigaction));
-        sig.sa_handler = signal_handler;
         sigaction(SIGHUP, &sig, NULL);
 
         interval = (1.0 / arg_hz) * 1000000000.0;
 
         log_uptime();
 
+        if (graph_start < 0.0) {
+                fprintf(stderr,
+                        "Failed to setup graph start time.\n\nThe system uptime "
+                        "probably includes time that the system was suspended. "
+                        "Use --rel to bypass this issue.\n");
+                exit (EXIT_FAILURE);
+        }
+
+        has_procfs = access("/proc/vmstat", F_OK) == 0;
+
+        LIST_HEAD_INIT(head);
+
         /* main program loop */
-        while (!exiting) {
+        for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
                 int res;
                 double sample_stop;
                 struct timespec req;
@@ -297,32 +378,39 @@ int main(int argc, char *argv[]) {
                 double elapsed;
                 double timeleft;
 
-                sampletime[samples] = gettime_ns();
+                sampledata = new0(struct list_sample_data, 1);
+                if (sampledata == NULL) {
+                        log_oom();
+                        return EXIT_FAILURE;
+                }
+
+                sampledata->sampletime = gettime_ns();
+                sampledata->counter = samples;
 
                 if (!of && (access(arg_output_path, R_OK|W_OK|X_OK) == 0)) {
                         t = time(NULL);
                         strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
                         snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
-                        of = fopen(output_file, "w");
+                        of = fopen(output_file, "we");
                 }
 
                 if (sysfd < 0)
-                        sysfd = open("/sys", O_RDONLY);
+                        sysfd = open("/sys", O_RDONLY|O_CLOEXEC);
 
-                if (!build)
-                        parse_env_file("/etc/os-release", NEWLINE,
-                                       "PRETTY_NAME", &build,
-                                       NULL);
+                if (!build) {
+                        if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &build, NULL) == -ENOENT)
+                                parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &build, NULL);
+                }
 
-                /* wait for /proc to become available, discarding samples */
-                if (!(graph_start > 0.0))
-                        log_uptime();
+                if (has_procfs)
+                        log_sample(samples, &sampledata);
                 else
-                        log_sample(samples);
+                        /* wait for /proc to become available, discarding samples */
+                        has_procfs = access("/proc/vmstat", F_OK) == 0;
 
                 sample_stop = gettime_ns();
 
-                elapsed = (sample_stop - sampletime[samples]) * 1000000000.0;
+                elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
                 timeleft = interval - elapsed;
 
                 newint_s = (time_t)(timeleft / 1000000000.0);
@@ -334,7 +422,7 @@ int main(int argc, char *argv[]) {
                  * we'll lose all the missed samples and overrun our total
                  * time
                  */
-                if ((newint_ns > 0) || (newint_s > 0)) {
+                if (newint_ns > 0 || newint_s > 0) {
                         req.tv_sec = newint_s;
                         req.tv_nsec = newint_ns;
 
@@ -344,20 +432,15 @@ int main(int argc, char *argv[]) {
                                         /* caught signal, probably HUP! */
                                         break;
                                 }
-                                perror("nanosleep()");
-                                exit (EXIT_FAILURE);
+                                log_error("nanosleep() failed: %m");
+                                exit(EXIT_FAILURE);
                         }
                 } else {
                         overrun++;
                         /* calculate how many samples we lost and scrap them */
-                        arg_samples_len = arg_samples_len + ((int)(newint_ns / interval));
+                        arg_samples_len -= (int)(newint_ns / interval);
                 }
-
-                samples++;
-
-                if (samples > arg_samples_len)
-                        break;
-
+                LIST_PREPEND(link, head, sampledata);
         }
 
         /* do some cleanup, close fd's */
@@ -376,7 +459,7 @@ int main(int argc, char *argv[]) {
                 t = time(NULL);
                 strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
                 snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
-                of = fopen(output_file, "w");
+                of = fopen(output_file, "we");
         }
 
         if (!of) {
@@ -388,6 +471,8 @@ int main(int argc, char *argv[]) {
 
         fprintf(stderr, "systemd-bootchart wrote %s\n", output_file);
 
+        do_journal_append(output_file);
+
         if (of)
                 fclose(of);
 
@@ -396,16 +481,34 @@ int main(int argc, char *argv[]) {
                 close(sysfd);
 
         /* nitpic cleanups */
-        ps = ps_first;
+        ps = ps_first->next_ps;
         while (ps->next_ps) {
-                struct ps_struct *old = ps;
+                struct ps_struct *old;
+
+                old = ps;
+                old->sample = ps->first;
                 ps = ps->next_ps;
+                while (old->sample->next) {
+                        struct ps_sched_struct *oldsample = old->sample;
+
+                        old->sample = old->sample->next;
+                        free(oldsample);
+                }
+                free(old->cgroup);
                 free(old->sample);
                 free(old);
         }
+        free(ps->cgroup);
         free(ps->sample);
         free(ps);
 
+        sampledata = head;
+        while (sampledata->link_prev) {
+                struct list_sample_data *old_sampledata = sampledata;
+                sampledata = sampledata->link_prev;
+                free(old_sampledata);
+        }
+        free(sampledata);
         /* don't complain when overrun once, happens most commonly on 1st sample */
         if (overrun > 1)
                 fprintf(stderr, "systemd-boochart: Warning: sample time overrun %i times\n", overrun);