chiark / gitweb /
607ec42690ad23bec9f746bbb8e8d0f9acd12601
[elogind.git] / src / bootchart / bootchart.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 /***
26
27   Many thanks to those who contributed ideas and code:
28   - Ziga Mahkovec - Original bootchart author
29   - Anders Norgaard - PyBootchartgui
30   - Michael Meeks - bootchart2
31   - Scott James Remnant - Ubuntu C-based logger
32   - Arjan van der Ven - for the idea to merge bootgraph.pl functionality
33
34  ***/
35
36 #include <sys/resource.h>
37 #include <stdio.h>
38 #include <signal.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <time.h>
43 #include <getopt.h>
44 #include <limits.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdbool.h>
48 #include "systemd/sd-journal.h"
49
50 #include "util.h"
51 #include "fileio.h"
52 #include "macro.h"
53 #include "conf-parser.h"
54 #include "strxcpyx.h"
55 #include "path-util.h"
56 #include "store.h"
57 #include "svg.h"
58 #include "bootchart.h"
59 #include "list.h"
60
61 double graph_start;
62 double log_start;
63 struct ps_struct *ps_first;
64 int pscount;
65 int cpus;
66 double interval;
67 FILE *of = NULL;
68 int overrun = 0;
69 static int exiting = 0;
70 int sysfd=-1;
71
72 #define DEFAULT_SAMPLES_LEN 500
73 #define DEFAULT_HZ 25.0
74 #define DEFAULT_SCALE_X 100.0 /* 100px = 1sec */
75 #define DEFAULT_SCALE_Y 20.0  /* 16px = 1 process bar */
76 #define DEFAULT_INIT ROOTLIBEXECDIR "/systemd"
77 #define DEFAULT_OUTPUT "/run/log"
78
79 /* graph defaults */
80 bool arg_entropy = false;
81 bool initcall = true;
82 bool arg_relative = false;
83 bool arg_filter = true;
84 bool arg_show_cmdline = false;
85 bool arg_show_cgroup = false;
86 bool arg_pss = false;
87 bool arg_percpu = false;
88 int samples;
89 int arg_samples_len = DEFAULT_SAMPLES_LEN; /* we record len+1 (1 start sample) */
90 double arg_hz = DEFAULT_HZ;
91 double arg_scale_x = DEFAULT_SCALE_X;
92 double arg_scale_y = DEFAULT_SCALE_Y;
93 static struct list_sample_data *sampledata;
94 struct list_sample_data *head;
95
96 char arg_init_path[PATH_MAX] = DEFAULT_INIT;
97 char arg_output_path[PATH_MAX] = DEFAULT_OUTPUT;
98
99 static void signal_handler(int sig) {
100         if (sig++)
101                 sig--;
102         exiting = 1;
103 }
104
105 #define BOOTCHART_CONF "/etc/systemd/bootchart.conf"
106
107 #define BOOTCHART_MAX (16*1024*1024)
108
109 static void parse_conf(void) {
110         char *init = NULL, *output = NULL;
111         const ConfigTableItem items[] = {
112                 { "Bootchart", "Samples",          config_parse_int,    0, &arg_samples_len },
113                 { "Bootchart", "Frequency",        config_parse_double, 0, &arg_hz          },
114                 { "Bootchart", "Relative",         config_parse_bool,   0, &arg_relative    },
115                 { "Bootchart", "Filter",           config_parse_bool,   0, &arg_filter      },
116                 { "Bootchart", "Output",           config_parse_path,   0, &output          },
117                 { "Bootchart", "Init",             config_parse_path,   0, &init            },
118                 { "Bootchart", "PlotMemoryUsage",  config_parse_bool,   0, &arg_pss         },
119                 { "Bootchart", "PlotEntropyGraph", config_parse_bool,   0, &arg_entropy     },
120                 { "Bootchart", "ScaleX",           config_parse_double, 0, &arg_scale_x     },
121                 { "Bootchart", "ScaleY",           config_parse_double, 0, &arg_scale_y     },
122                 { "Bootchart", "ControlGroup",     config_parse_bool,   0, &arg_show_cgroup },
123                 { "Bootchart", "PerCPU",           config_parse_bool,   0, &arg_percpu      },
124                 { NULL, NULL, NULL, 0, NULL }
125         };
126
127         config_parse_many(BOOTCHART_CONF,
128                           CONF_DIRS_NULSTR("systemd/bootchart.conf"),
129                           NULL, config_item_table_lookup, items, true, NULL);
130
131         if (init != NULL)
132                 strscpy(arg_init_path, sizeof(arg_init_path), init);
133         if (output != NULL)
134                 strscpy(arg_output_path, sizeof(arg_output_path), output);
135 }
136
137 static void help(void) {
138         fprintf(stdout,
139                 "Usage: %s [OPTIONS]\n\n"
140                 "Options:\n"
141                 "  -r, --rel             Record time relative to recording\n"
142                 "  -f, --freq=FREQ       Sample frequency [%g]\n"
143                 "  -n, --samples=N       Stop sampling at [%d] samples\n"
144                 "  -x, --scale-x=N       Scale the graph horizontally [%g] \n"
145                 "  -y, --scale-y=N       Scale the graph vertically [%g] \n"
146                 "  -p, --pss             Enable PSS graph (CPU intensive)\n"
147                 "  -e, --entropy         Enable the entropy_avail graph\n"
148                 "  -o, --output=PATH     Path to output files [%s]\n"
149                 "  -i, --init=PATH       Path to init executable [%s]\n"
150                 "  -F, --no-filter       Disable filtering of unimportant or ephemeral processes\n"
151                 "  -C, --cmdline         Display full command lines with arguments\n"
152                 "  -c, --control-group   Display process control group\n"
153                 "      --per-cpu         Draw each CPU utilization and wait bar also\n"
154                 "  -h, --help            Display this message\n\n"
155                 "See bootchart.conf for more information.\n",
156                 program_invocation_short_name,
157                 DEFAULT_HZ,
158                 DEFAULT_SAMPLES_LEN,
159                 DEFAULT_SCALE_X,
160                 DEFAULT_SCALE_Y,
161                 DEFAULT_OUTPUT,
162                 DEFAULT_INIT);
163 }
164
165 static int parse_argv(int argc, char *argv[]) {
166
167         enum {
168                 ARG_PERCPU = 0x100,
169         };
170
171         static const struct option options[] = {
172                 {"rel",           no_argument,        NULL,  'r'       },
173                 {"freq",          required_argument,  NULL,  'f'       },
174                 {"samples",       required_argument,  NULL,  'n'       },
175                 {"pss",           no_argument,        NULL,  'p'       },
176                 {"output",        required_argument,  NULL,  'o'       },
177                 {"init",          required_argument,  NULL,  'i'       },
178                 {"no-filter",     no_argument,        NULL,  'F'       },
179                 {"cmdline",       no_argument,        NULL,  'C'       },
180                 {"control-group", no_argument,        NULL,  'c'       },
181                 {"help",          no_argument,        NULL,  'h'       },
182                 {"scale-x",       required_argument,  NULL,  'x'       },
183                 {"scale-y",       required_argument,  NULL,  'y'       },
184                 {"entropy",       no_argument,        NULL,  'e'       },
185                 {"per-cpu",       no_argument,        NULL,  ARG_PERCPU},
186                 {}
187         };
188         int c, r;
189
190         if (getpid() == 1)
191                 opterr = 0;
192
193         while ((c = getopt_long(argc, argv, "erpf:n:o:i:FCchx:y:", options, NULL)) >= 0)
194                 switch (c) {
195
196                 case 'r':
197                         arg_relative = true;
198                         break;
199                 case 'f':
200                         r = safe_atod(optarg, &arg_hz);
201                         if (r < 0)
202                                 log_warning_errno(r, "failed to parse --freq/-f argument '%s': %m",
203                                                   optarg);
204                         break;
205                 case 'F':
206                         arg_filter = false;
207                         break;
208                 case 'C':
209                         arg_show_cmdline = true;
210                         break;
211                 case 'c':
212                         arg_show_cgroup = true;
213                         break;
214                 case 'n':
215                         r = safe_atoi(optarg, &arg_samples_len);
216                         if (r < 0)
217                                 log_warning_errno(r, "failed to parse --samples/-n argument '%s': %m",
218                                                   optarg);
219                         break;
220                 case 'o':
221                         path_kill_slashes(optarg);
222                         strscpy(arg_output_path, sizeof(arg_output_path), optarg);
223                         break;
224                 case 'i':
225                         path_kill_slashes(optarg);
226                         strscpy(arg_init_path, sizeof(arg_init_path), optarg);
227                         break;
228                 case 'p':
229                         arg_pss = true;
230                         break;
231                 case 'x':
232                         r = safe_atod(optarg, &arg_scale_x);
233                         if (r < 0)
234                                 log_warning_errno(r, "failed to parse --scale-x/-x argument '%s': %m",
235                                                   optarg);
236                         break;
237                 case 'y':
238                         r = safe_atod(optarg, &arg_scale_y);
239                         if (r < 0)
240                                 log_warning_errno(r, "failed to parse --scale-y/-y argument '%s': %m",
241                                                   optarg);
242                         break;
243                 case 'e':
244                         arg_entropy = true;
245                         break;
246                 case ARG_PERCPU:
247                         arg_percpu = true;
248                         break;
249                 case 'h':
250                         help();
251                         return 0;
252                 case '?':
253                         if (getpid() != 1)
254                                 return -EINVAL;
255                         else
256                                 return 0;
257                 default:
258                         assert_not_reached("Unhandled option code.");
259                 }
260
261         if (arg_hz <= 0) {
262                 log_error("Frequency needs to be > 0");
263                 return -EINVAL;
264         }
265
266         return 1;
267 }
268
269 static void do_journal_append(char *file) {
270         struct iovec iovec[5];
271         int r, f, j = 0;
272         ssize_t n;
273         _cleanup_free_ char *bootchart_file = NULL, *bootchart_message = NULL,
274                 *p = NULL;
275
276         bootchart_file = strappend("BOOTCHART_FILE=", file);
277         if (bootchart_file)
278                 IOVEC_SET_STRING(iovec[j++], bootchart_file);
279
280         IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=9f26aa562cf440c2b16c773d0479b518");
281         IOVEC_SET_STRING(iovec[j++], "PRIORITY=7");
282         bootchart_message = strjoin("MESSAGE=Bootchart created: ", file, NULL);
283         if (bootchart_message)
284                 IOVEC_SET_STRING(iovec[j++], bootchart_message);
285
286         p = malloc(9 + BOOTCHART_MAX);
287         if (!p) {
288                 log_oom();
289                 return;
290         }
291
292         memcpy(p, "BOOTCHART=", 10);
293
294         f = open(file, O_RDONLY|O_CLOEXEC);
295         if (f < 0) {
296                 log_error_errno(errno, "Failed to read bootchart data: %m");
297                 return;
298         }
299         n = loop_read(f, p + 10, BOOTCHART_MAX, false);
300         if (n < 0) {
301                 log_error_errno(n, "Failed to read bootchart data: %m");
302                 close(f);
303                 return;
304         }
305         close(f);
306
307         iovec[j].iov_base = p;
308         iovec[j].iov_len = 10 + n;
309         j++;
310
311         r = sd_journal_sendv(iovec, j);
312         if (r < 0)
313                 log_error_errno(r, "Failed to send bootchart: %m");
314 }
315
316 int main(int argc, char *argv[]) {
317         _cleanup_free_ char *build = NULL;
318         struct sigaction sig = {
319                 .sa_handler = signal_handler,
320         };
321         struct ps_struct *ps;
322         char output_file[PATH_MAX];
323         char datestr[200];
324         time_t t = 0;
325         int r;
326         struct rlimit rlim;
327         bool has_procfs = false;
328
329         parse_conf();
330
331         r = parse_argv(argc, argv);
332         if (r <= 0)
333                 return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
334
335         /*
336          * If the kernel executed us through init=/usr/lib/systemd/systemd-bootchart, then
337          * fork:
338          * - parent execs executable specified via init_path[] (/usr/lib/systemd/systemd by default) as pid=1
339          * - child logs data
340          */
341         if (getpid() == 1) {
342                 if (fork()) {
343                         /* parent */
344                         execl(arg_init_path, arg_init_path, NULL);
345                 }
346         }
347         argv[0][0] = '@';
348
349         rlim.rlim_cur = 4096;
350         rlim.rlim_max = 4096;
351         (void) setrlimit(RLIMIT_NOFILE, &rlim);
352
353         /* start with empty ps LL */
354         ps_first = new0(struct ps_struct, 1);
355         if (!ps_first) {
356                 log_oom();
357                 return EXIT_FAILURE;
358         }
359
360         /* handle TERM/INT nicely */
361         sigaction(SIGHUP, &sig, NULL);
362
363         interval = (1.0 / arg_hz) * 1000000000.0;
364
365         log_uptime();
366
367         if (graph_start < 0.0) {
368                 fprintf(stderr,
369                         "Failed to setup graph start time.\n\nThe system uptime "
370                         "probably includes time that the system was suspended. "
371                         "Use --rel to bypass this issue.\n");
372                 exit (EXIT_FAILURE);
373         }
374
375         has_procfs = access("/proc/vmstat", F_OK) == 0;
376
377         LIST_HEAD_INIT(head);
378
379         /* main program loop */
380         for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
381                 int res;
382                 double sample_stop;
383                 struct timespec req;
384                 time_t newint_s;
385                 long newint_ns;
386                 double elapsed;
387                 double timeleft;
388
389                 sampledata = new0(struct list_sample_data, 1);
390                 if (sampledata == NULL) {
391                         log_oom();
392                         return EXIT_FAILURE;
393                 }
394
395                 sampledata->sampletime = gettime_ns();
396                 sampledata->counter = samples;
397
398                 if (!of && (access(arg_output_path, R_OK|W_OK|X_OK) == 0)) {
399                         t = time(NULL);
400                         r = strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
401                         assert_se(r > 0);
402
403                         snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
404                         of = fopen(output_file, "we");
405                 }
406
407                 if (sysfd < 0)
408                         sysfd = open("/sys", O_RDONLY|O_CLOEXEC);
409
410                 if (!build) {
411                         if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &build, NULL) == -ENOENT)
412                                 parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &build, NULL);
413                 }
414
415                 if (has_procfs)
416                         log_sample(samples, &sampledata);
417                 else
418                         /* wait for /proc to become available, discarding samples */
419                         has_procfs = access("/proc/vmstat", F_OK) == 0;
420
421                 sample_stop = gettime_ns();
422
423                 elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
424                 timeleft = interval - elapsed;
425
426                 newint_s = (time_t)(timeleft / 1000000000.0);
427                 newint_ns = (long)(timeleft - (newint_s * 1000000000.0));
428
429                 /*
430                  * check if we have not consumed our entire timeslice. If we
431                  * do, don't sleep and take a new sample right away.
432                  * we'll lose all the missed samples and overrun our total
433                  * time
434                  */
435                 if (newint_ns > 0 || newint_s > 0) {
436                         req.tv_sec = newint_s;
437                         req.tv_nsec = newint_ns;
438
439                         res = nanosleep(&req, NULL);
440                         if (res) {
441                                 if (errno == EINTR) {
442                                         /* caught signal, probably HUP! */
443                                         break;
444                                 }
445                                 log_error_errno(errno, "nanosleep() failed: %m");
446                                 exit(EXIT_FAILURE);
447                         }
448                 } else {
449                         overrun++;
450                         /* calculate how many samples we lost and scrap them */
451                         arg_samples_len -= (int)(newint_ns / interval);
452                 }
453                 LIST_PREPEND(link, head, sampledata);
454         }
455
456         /* do some cleanup, close fd's */
457         ps = ps_first;
458         while (ps->next_ps) {
459                 ps = ps->next_ps;
460                 if (ps->schedstat)
461                         close(ps->schedstat);
462                 if (ps->sched)
463                         close(ps->sched);
464                 if (ps->smaps)
465                         fclose(ps->smaps);
466         }
467
468         if (!of) {
469                 t = time(NULL);
470                 r = strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
471                 assert_se(r > 0);
472
473                 snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
474                 of = fopen(output_file, "we");
475         }
476
477         if (!of) {
478                 fprintf(stderr, "opening output file '%s': %m\n", output_file);
479                 exit (EXIT_FAILURE);
480         }
481
482         svg_do(strna(build));
483
484         fprintf(stderr, "systemd-bootchart wrote %s\n", output_file);
485
486         do_journal_append(output_file);
487
488         if (of)
489                 fclose(of);
490
491         closedir(proc);
492         if (sysfd >= 0)
493                 close(sysfd);
494
495         /* nitpic cleanups */
496         ps = ps_first->next_ps;
497         while (ps->next_ps) {
498                 struct ps_struct *old;
499
500                 old = ps;
501                 old->sample = ps->first;
502                 ps = ps->next_ps;
503                 while (old->sample->next) {
504                         struct ps_sched_struct *oldsample = old->sample;
505
506                         old->sample = old->sample->next;
507                         free(oldsample);
508                 }
509                 free(old->cgroup);
510                 free(old->sample);
511                 free(old);
512         }
513         free(ps->cgroup);
514         free(ps->sample);
515         free(ps);
516
517         sampledata = head;
518         while (sampledata->link_prev) {
519                 struct list_sample_data *old_sampledata = sampledata;
520                 sampledata = sampledata->link_prev;
521                 free(old_sampledata);
522         }
523         free(sampledata);
524         /* don't complain when overrun once, happens most commonly on 1st sample */
525         if (overrun > 1)
526                 fprintf(stderr, "systemd-boochart: Warning: sample time overrun %i times\n", overrun);
527
528         return 0;
529 }