chiark / gitweb /
5683025b1970df3f16da846326f70c2713c68fb8
[elogind.git] / src / bootchart / bootchart.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 /***
26
27   Many thanks to those who contributed ideas and code:
28   - Ziga Mahkovec - Original bootchart author
29   - Anders Norgaard - PyBootchartgui
30   - Michael Meeks - bootchart2
31   - Scott James Remnant - Ubuntu C-based logger
32   - Arjan van der Ven - for the idea to merge bootgraph.pl functionality
33
34  ***/
35
36 #include <sys/time.h>
37 #include <sys/types.h>
38 #include <sys/resource.h>
39 #include <sys/stat.h>
40 #include <stdio.h>
41 #include <signal.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <time.h>
46 #include <getopt.h>
47 #include <limits.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stdbool.h>
51 #include "systemd/sd-journal.h"
52
53 #include "util.h"
54 #include "fileio.h"
55 #include "macro.h"
56 #include "conf-parser.h"
57 #include "strxcpyx.h"
58 #include "path-util.h"
59 #include "store.h"
60 #include "svg.h"
61 #include "bootchart.h"
62 #include "list.h"
63
64 double graph_start;
65 double log_start;
66 struct ps_struct *ps_first;
67 int pscount;
68 int cpus;
69 double interval;
70 FILE *of = NULL;
71 int overrun = 0;
72 static int exiting = 0;
73 int sysfd=-1;
74
75 #define DEFAULT_SAMPLES_LEN 500
76 #define DEFAULT_HZ 25.0
77 #define DEFAULT_SCALE_X 100.0 /* 100px = 1sec */
78 #define DEFAULT_SCALE_Y 20.0  /* 16px = 1 process bar */
79 #define DEFAULT_INIT "/sbin/init"
80 #define DEFAULT_OUTPUT "/run/log"
81
82 /* graph defaults */
83 bool arg_entropy = false;
84 bool initcall = true;
85 bool arg_relative = false;
86 bool arg_filter = true;
87 bool arg_show_cmdline = false;
88 bool arg_show_cgroup = false;
89 bool arg_pss = false;
90 int samples;
91 int arg_samples_len = DEFAULT_SAMPLES_LEN; /* we record len+1 (1 start sample) */
92 double arg_hz = DEFAULT_HZ;
93 double arg_scale_x = DEFAULT_SCALE_X;
94 double arg_scale_y = DEFAULT_SCALE_Y;
95 static struct list_sample_data *sampledata;
96 struct list_sample_data *head;
97
98 char arg_init_path[PATH_MAX] = DEFAULT_INIT;
99 char arg_output_path[PATH_MAX] = DEFAULT_OUTPUT;
100
101 static void signal_handler(int sig) {
102         if (sig++)
103                 sig--;
104         exiting = 1;
105 }
106
107 #define BOOTCHART_CONF "/etc/systemd/bootchart.conf"
108
109 #define BOOTCHART_MAX (16*1024*1024)
110
111 static void parse_conf(void) {
112         char *init = NULL, *output = NULL;
113         const ConfigTableItem items[] = {
114                 { "Bootchart", "Samples",          config_parse_int,    0, &arg_samples_len },
115                 { "Bootchart", "Frequency",        config_parse_double, 0, &arg_hz          },
116                 { "Bootchart", "Relative",         config_parse_bool,   0, &arg_relative    },
117                 { "Bootchart", "Filter",           config_parse_bool,   0, &arg_filter      },
118                 { "Bootchart", "Output",           config_parse_path,   0, &output          },
119                 { "Bootchart", "Init",             config_parse_path,   0, &init            },
120                 { "Bootchart", "PlotMemoryUsage",  config_parse_bool,   0, &arg_pss         },
121                 { "Bootchart", "PlotEntropyGraph", config_parse_bool,   0, &arg_entropy     },
122                 { "Bootchart", "ScaleX",           config_parse_double, 0, &arg_scale_x     },
123                 { "Bootchart", "ScaleY",           config_parse_double, 0, &arg_scale_y     },
124                 { "Bootchart", "ControlGroup",     config_parse_bool,   0, &arg_show_cgroup },
125                 { NULL, NULL, NULL, 0, NULL }
126         };
127
128         config_parse(NULL, BOOTCHART_CONF, NULL,
129                      NULL,
130                      config_item_table_lookup, items,
131                      true, false, true, NULL);
132
133         if (init != NULL)
134                 strscpy(arg_init_path, sizeof(arg_init_path), init);
135         if (output != NULL)
136                 strscpy(arg_output_path, sizeof(arg_output_path), output);
137 }
138
139 static void help(void) {
140         fprintf(stdout,
141                 "Usage: %s [OPTIONS]\n\n"
142                 "Options:\n"
143                 "  -r, --rel             Record time relative to recording\n"
144                 "  -f, --freq=FREQ       Sample frequency [%g]\n"
145                 "  -n, --samples=N       Stop sampling at [%d] samples\n"
146                 "  -x, --scale-x=N       Scale the graph horizontally [%g] \n"
147                 "  -y, --scale-y=N       Scale the graph vertically [%g] \n"
148                 "  -p, --pss             Enable PSS graph (CPU intensive)\n"
149                 "  -e, --entropy         Enable the entropy_avail graph\n"
150                 "  -o, --output=PATH     Path to output files [%s]\n"
151                 "  -i, --init=PATH       Path to init executable [%s]\n"
152                 "  -F, --no-filter       Disable filtering of unimportant or ephemeral processes\n"
153                 "  -C, --cmdline         Display full command lines with arguments\n"
154                 "  -c, --control-group   Display process control group\n"
155                 "  -h, --help            Display this message\n\n"
156                 "See bootchart.conf for more information.\n",
157                 program_invocation_short_name,
158                 DEFAULT_HZ,
159                 DEFAULT_SAMPLES_LEN,
160                 DEFAULT_SCALE_X,
161                 DEFAULT_SCALE_Y,
162                 DEFAULT_OUTPUT,
163                 DEFAULT_INIT);
164 }
165
166 static int parse_args(int argc, char *argv[]) {
167         static const struct option options[] = {
168                 {"rel",           no_argument,        NULL,  'r'},
169                 {"freq",          required_argument,  NULL,  'f'},
170                 {"samples",       required_argument,  NULL,  'n'},
171                 {"pss",           no_argument,        NULL,  'p'},
172                 {"output",        required_argument,  NULL,  'o'},
173                 {"init",          required_argument,  NULL,  'i'},
174                 {"no-filter",     no_argument,        NULL,  'F'},
175                 {"cmdline",       no_argument,        NULL,  'C'},
176                 {"control-group", no_argument,        NULL,  'c'},
177                 {"help",          no_argument,        NULL,  'h'},
178                 {"scale-x",       required_argument,  NULL,  'x'},
179                 {"scale-y",       required_argument,  NULL,  'y'},
180                 {"entropy",       no_argument,        NULL,  'e'},
181                 {}
182         };
183         int c;
184
185         while ((c = getopt_long(argc, argv, "erpf:n:o:i:FCchx:y:", options, NULL)) >= 0) {
186                 int r;
187
188                 switch (c) {
189                 case 'r':
190                         arg_relative = true;
191                         break;
192                 case 'f':
193                         r = safe_atod(optarg, &arg_hz);
194                         if (r < 0)
195                                 log_warning("failed to parse --freq/-f argument '%s': %s",
196                                             optarg, strerror(-r));
197                         break;
198                 case 'F':
199                         arg_filter = false;
200                         break;
201                 case 'C':
202                         arg_show_cmdline = true;
203                         break;
204                 case 'c':
205                         arg_show_cgroup = true;
206                         break;
207                 case 'n':
208                         r = safe_atoi(optarg, &arg_samples_len);
209                         if (r < 0)
210                                 log_warning("failed to parse --samples/-n argument '%s': %s",
211                                             optarg, strerror(-r));
212                         break;
213                 case 'o':
214                         path_kill_slashes(optarg);
215                         strscpy(arg_output_path, sizeof(arg_output_path), optarg);
216                         break;
217                 case 'i':
218                         path_kill_slashes(optarg);
219                         strscpy(arg_init_path, sizeof(arg_init_path), optarg);
220                         break;
221                 case 'p':
222                         arg_pss = true;
223                         break;
224                 case 'x':
225                         r = safe_atod(optarg, &arg_scale_x);
226                         if (r < 0)
227                                 log_warning("failed to parse --scale-x/-x argument '%s': %s",
228                                             optarg, strerror(-r));
229                         break;
230                 case 'y':
231                         r = safe_atod(optarg, &arg_scale_y);
232                         if (r < 0)
233                                 log_warning("failed to parse --scale-y/-y argument '%s': %s",
234                                             optarg, strerror(-r));
235                         break;
236                 case 'e':
237                         arg_entropy = true;
238                         break;
239                 case 'h':
240                         help();
241                         exit (EXIT_SUCCESS);
242                 default:
243                         break;
244                 }
245         }
246
247         if (arg_hz <= 0.0) {
248                 fprintf(stderr, "Error: Frequency needs to be > 0\n");
249                 return -EINVAL;
250         }
251
252         return 0;
253 }
254
255 static void do_journal_append(char *file) {
256         struct iovec iovec[5];
257         int r, f, j = 0;
258         ssize_t n;
259         _cleanup_free_ char *bootchart_file = NULL, *bootchart_message = NULL,
260                 *p = NULL;
261
262         bootchart_file = strappend("BOOTCHART_FILE=", file);
263         if (bootchart_file)
264                 IOVEC_SET_STRING(iovec[j++], bootchart_file);
265
266         IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=9f26aa562cf440c2b16c773d0479b518");
267         IOVEC_SET_STRING(iovec[j++], "PRIORITY=7");
268         bootchart_message = strjoin("MESSAGE=Bootchart created: ", file, NULL);
269         if (bootchart_message)
270                 IOVEC_SET_STRING(iovec[j++], bootchart_message);
271
272         p = malloc(9 + BOOTCHART_MAX);
273         if (!p) {
274                 log_oom();
275                 return;
276         }
277
278         memcpy(p, "BOOTCHART=", 10);
279
280         f = open(file, O_RDONLY|O_CLOEXEC);
281         if (f < 0) {
282                 log_error("Failed to read bootchart data: %m");
283                 return;
284         }
285         n = loop_read(f, p + 10, BOOTCHART_MAX, false);
286         if (n < 0) {
287                 log_error("Failed to read bootchart data: %s", strerror(-n));
288                 close(f);
289                 return;
290         }
291         close(f);
292
293         iovec[j].iov_base = p;
294         iovec[j].iov_len = 10 + n;
295         j++;
296
297         r = sd_journal_sendv(iovec, j);
298         if (r < 0)
299                 log_error("Failed to send bootchart: %s", strerror(-r));
300 }
301
302 int main(int argc, char *argv[]) {
303         _cleanup_free_ char *build = NULL;
304         struct sigaction sig = {
305                 .sa_handler = signal_handler,
306         };
307         struct ps_struct *ps;
308         char output_file[PATH_MAX];
309         char datestr[200];
310         time_t t = 0;
311         int r;
312         struct rlimit rlim;
313         bool has_procfs = false;
314
315         parse_conf();
316
317         r = parse_args(argc, argv);
318         if (r < 0)
319                 return EXIT_FAILURE;
320
321         /*
322          * If the kernel executed us through init=/usr/lib/systemd/systemd-bootchart, then
323          * fork:
324          * - parent execs executable specified via init_path[] (/sbin/init by default) as pid=1
325          * - child logs data
326          */
327         if (getpid() == 1) {
328                 if (fork()) {
329                         /* parent */
330                         execl(arg_init_path, arg_init_path, NULL);
331                 }
332         }
333         argv[0][0] = '@';
334
335         rlim.rlim_cur = 4096;
336         rlim.rlim_max = 4096;
337         (void) setrlimit(RLIMIT_NOFILE, &rlim);
338
339         /* start with empty ps LL */
340         ps_first = new0(struct ps_struct, 1);
341         if (!ps_first) {
342                 log_oom();
343                 return EXIT_FAILURE;
344         }
345
346         /* handle TERM/INT nicely */
347         sigaction(SIGHUP, &sig, NULL);
348
349         interval = (1.0 / arg_hz) * 1000000000.0;
350
351         log_uptime();
352
353         if (graph_start < 0.0) {
354                 fprintf(stderr,
355                         "Failed to setup graph start time.\n\nThe system uptime "
356                         "probably includes time that the system was suspended. "
357                         "Use --rel to bypass this issue.\n");
358                 exit (EXIT_FAILURE);
359         }
360
361         has_procfs = access("/proc/vmstat", F_OK) == 0;
362
363         LIST_HEAD_INIT(head);
364
365         /* main program loop */
366         for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
367                 int res;
368                 double sample_stop;
369                 struct timespec req;
370                 time_t newint_s;
371                 long newint_ns;
372                 double elapsed;
373                 double timeleft;
374
375                 sampledata = new0(struct list_sample_data, 1);
376                 if (sampledata == NULL) {
377                         log_error("Failed to allocate memory for a node: %m");
378                         return -1;
379                 }
380
381                 sampledata->sampletime = gettime_ns();
382                 sampledata->counter = samples;
383
384                 if (!of && (access(arg_output_path, R_OK|W_OK|X_OK) == 0)) {
385                         t = time(NULL);
386                         strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
387                         snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
388                         of = fopen(output_file, "we");
389                 }
390
391                 if (sysfd < 0)
392                         sysfd = open("/sys", O_RDONLY|O_CLOEXEC);
393
394                 if (!build) {
395                         if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &build, NULL) == -ENOENT)
396                                 parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &build, NULL);
397                 }
398
399                 if (has_procfs)
400                         log_sample(samples, &sampledata);
401                 else
402                         /* wait for /proc to become available, discarding samples */
403                         has_procfs = access("/proc/vmstat", F_OK) == 0;
404
405                 sample_stop = gettime_ns();
406
407                 elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
408                 timeleft = interval - elapsed;
409
410                 newint_s = (time_t)(timeleft / 1000000000.0);
411                 newint_ns = (long)(timeleft - (newint_s * 1000000000.0));
412
413                 /*
414                  * check if we have not consumed our entire timeslice. If we
415                  * do, don't sleep and take a new sample right away.
416                  * we'll lose all the missed samples and overrun our total
417                  * time
418                  */
419                 if (newint_ns > 0 || newint_s > 0) {
420                         req.tv_sec = newint_s;
421                         req.tv_nsec = newint_ns;
422
423                         res = nanosleep(&req, NULL);
424                         if (res) {
425                                 if (errno == EINTR) {
426                                         /* caught signal, probably HUP! */
427                                         break;
428                                 }
429                                 log_error("nanosleep() failed: %m");
430                                 exit(EXIT_FAILURE);
431                         }
432                 } else {
433                         overrun++;
434                         /* calculate how many samples we lost and scrap them */
435                         arg_samples_len -= (int)(newint_ns / interval);
436                 }
437                 LIST_PREPEND(link, head, sampledata);
438         }
439
440         /* do some cleanup, close fd's */
441         ps = ps_first;
442         while (ps->next_ps) {
443                 ps = ps->next_ps;
444                 if (ps->schedstat)
445                         close(ps->schedstat);
446                 if (ps->sched)
447                         close(ps->sched);
448                 if (ps->smaps)
449                         fclose(ps->smaps);
450         }
451
452         if (!of) {
453                 t = time(NULL);
454                 strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
455                 snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
456                 of = fopen(output_file, "we");
457         }
458
459         if (!of) {
460                 fprintf(stderr, "opening output file '%s': %m\n", output_file);
461                 exit (EXIT_FAILURE);
462         }
463
464         svg_do(build);
465
466         fprintf(stderr, "systemd-bootchart wrote %s\n", output_file);
467
468         do_journal_append(output_file);
469
470         if (of)
471                 fclose(of);
472
473         closedir(proc);
474         if (sysfd >= 0)
475                 close(sysfd);
476
477         /* nitpic cleanups */
478         ps = ps_first->next_ps;
479         while (ps->next_ps) {
480                 struct ps_struct *old;
481
482                 old = ps;
483                 old->sample = ps->first;
484                 ps = ps->next_ps;
485                 while (old->sample->next) {
486                         struct ps_sched_struct *oldsample = old->sample;
487
488                         old->sample = old->sample->next;
489                         free(oldsample);
490                 }
491                 free(old->cgroup);
492                 free(old->sample);
493                 free(old);
494         }
495         free(ps->cgroup);
496         free(ps->sample);
497         free(ps);
498
499         sampledata = head;
500         while (sampledata->link_prev) {
501                 struct list_sample_data *old_sampledata = sampledata;
502                 sampledata = sampledata->link_prev;
503                 free(old_sampledata);
504         }
505         free(sampledata);
506         /* don't complain when overrun once, happens most commonly on 1st sample */
507         if (overrun > 1)
508                 fprintf(stderr, "systemd-boochart: Warning: sample time overrun %i times\n", overrun);
509
510         return 0;
511 }