chiark / gitweb /
bootchart: clean up control flow logic
[elogind.git] / src / bootchart / bootchart.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 /***
26
27   Many thanks to those who contributed ideas and code:
28   - Ziga Mahkovec - Original bootchart author
29   - Anders Norgaard - PyBootchartgui
30   - Michael Meeks - bootchart2
31   - Scott James Remnant - Ubuntu C-based logger
32   - Arjan van der Ven - for the idea to merge bootgraph.pl functionality
33
34  ***/
35
36 #include <sys/resource.h>
37 #include <stdio.h>
38 #include <signal.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <time.h>
43 #include <getopt.h>
44 #include <limits.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdbool.h>
48 #include "systemd/sd-journal.h"
49
50 #include "util.h"
51 #include "fileio.h"
52 #include "macro.h"
53 #include "conf-parser.h"
54 #include "strxcpyx.h"
55 #include "path-util.h"
56 #include "store.h"
57 #include "svg.h"
58 #include "bootchart.h"
59 #include "list.h"
60
61 double graph_start;
62 double log_start;
63 struct ps_struct *ps_first;
64 int pscount;
65 int cpus;
66 double interval;
67 FILE *of = NULL;
68 int overrun = 0;
69 static int exiting = 0;
70 int sysfd=-1;
71
72 #define DEFAULT_SAMPLES_LEN 500
73 #define DEFAULT_HZ 25.0
74 #define DEFAULT_SCALE_X 100.0 /* 100px = 1sec */
75 #define DEFAULT_SCALE_Y 20.0  /* 16px = 1 process bar */
76 #define DEFAULT_INIT ROOTLIBEXECDIR "/systemd"
77 #define DEFAULT_OUTPUT "/run/log"
78
79 /* graph defaults */
80 bool arg_entropy = false;
81 bool initcall = true;
82 bool arg_relative = false;
83 bool arg_filter = true;
84 bool arg_show_cmdline = false;
85 bool arg_show_cgroup = false;
86 bool arg_pss = false;
87 bool arg_percpu = false;
88 int samples;
89 int arg_samples_len = DEFAULT_SAMPLES_LEN; /* we record len+1 (1 start sample) */
90 double arg_hz = DEFAULT_HZ;
91 double arg_scale_x = DEFAULT_SCALE_X;
92 double arg_scale_y = DEFAULT_SCALE_Y;
93 static struct list_sample_data *sampledata;
94 struct list_sample_data *head;
95
96 char arg_init_path[PATH_MAX] = DEFAULT_INIT;
97 char arg_output_path[PATH_MAX] = DEFAULT_OUTPUT;
98
99 static void signal_handler(int sig) {
100         if (sig++)
101                 sig--;
102         exiting = 1;
103 }
104
105 #define BOOTCHART_CONF "/etc/systemd/bootchart.conf"
106
107 #define BOOTCHART_MAX (16*1024*1024)
108
109 static void parse_conf(void) {
110         char *init = NULL, *output = NULL;
111         const ConfigTableItem items[] = {
112                 { "Bootchart", "Samples",          config_parse_int,    0, &arg_samples_len },
113                 { "Bootchart", "Frequency",        config_parse_double, 0, &arg_hz          },
114                 { "Bootchart", "Relative",         config_parse_bool,   0, &arg_relative    },
115                 { "Bootchart", "Filter",           config_parse_bool,   0, &arg_filter      },
116                 { "Bootchart", "Output",           config_parse_path,   0, &output          },
117                 { "Bootchart", "Init",             config_parse_path,   0, &init            },
118                 { "Bootchart", "PlotMemoryUsage",  config_parse_bool,   0, &arg_pss         },
119                 { "Bootchart", "PlotEntropyGraph", config_parse_bool,   0, &arg_entropy     },
120                 { "Bootchart", "ScaleX",           config_parse_double, 0, &arg_scale_x     },
121                 { "Bootchart", "ScaleY",           config_parse_double, 0, &arg_scale_y     },
122                 { "Bootchart", "ControlGroup",     config_parse_bool,   0, &arg_show_cgroup },
123                 { "Bootchart", "PerCPU",           config_parse_bool,   0, &arg_percpu      },
124                 { NULL, NULL, NULL, 0, NULL }
125         };
126
127         config_parse_many(BOOTCHART_CONF,
128                           CONF_DIRS_NULSTR("systemd/bootchart.conf"),
129                           NULL, config_item_table_lookup, items, true, NULL);
130
131         if (init != NULL)
132                 strscpy(arg_init_path, sizeof(arg_init_path), init);
133         if (output != NULL)
134                 strscpy(arg_output_path, sizeof(arg_output_path), output);
135 }
136
137 static void help(void) {
138         printf("Usage: %s [OPTIONS]\n\n"
139                "Options:\n"
140                "  -r --rel             Record time relative to recording\n"
141                "  -f --freq=FREQ       Sample frequency [%g]\n"
142                "  -n --samples=N       Stop sampling at [%d] samples\n"
143                "  -x --scale-x=N       Scale the graph horizontally [%g] \n"
144                "  -y --scale-y=N       Scale the graph vertically [%g] \n"
145                "  -p --pss             Enable PSS graph (CPU intensive)\n"
146                "  -e --entropy         Enable the entropy_avail graph\n"
147                "  -o --output=PATH     Path to output files [%s]\n"
148                "  -i --init=PATH       Path to init executable [%s]\n"
149                "  -F --no-filter       Disable filtering of unimportant or ephemeral processes\n"
150                "  -C --cmdline         Display full command lines with arguments\n"
151                "  -c --control-group   Display process control group\n"
152                "     --per-cpu         Draw each CPU utilization and wait bar also\n"
153                "  -h --help            Display this message\n\n"
154                "See bootchart.conf for more information.\n",
155                program_invocation_short_name,
156                DEFAULT_HZ,
157                DEFAULT_SAMPLES_LEN,
158                DEFAULT_SCALE_X,
159                DEFAULT_SCALE_Y,
160                DEFAULT_OUTPUT,
161                DEFAULT_INIT);
162 }
163
164 static int parse_argv(int argc, char *argv[]) {
165
166         enum {
167                 ARG_PERCPU = 0x100,
168         };
169
170         static const struct option options[] = {
171                 {"rel",           no_argument,        NULL,  'r'       },
172                 {"freq",          required_argument,  NULL,  'f'       },
173                 {"samples",       required_argument,  NULL,  'n'       },
174                 {"pss",           no_argument,        NULL,  'p'       },
175                 {"output",        required_argument,  NULL,  'o'       },
176                 {"init",          required_argument,  NULL,  'i'       },
177                 {"no-filter",     no_argument,        NULL,  'F'       },
178                 {"cmdline",       no_argument,        NULL,  'C'       },
179                 {"control-group", no_argument,        NULL,  'c'       },
180                 {"help",          no_argument,        NULL,  'h'       },
181                 {"scale-x",       required_argument,  NULL,  'x'       },
182                 {"scale-y",       required_argument,  NULL,  'y'       },
183                 {"entropy",       no_argument,        NULL,  'e'       },
184                 {"per-cpu",       no_argument,        NULL,  ARG_PERCPU},
185                 {}
186         };
187         int c, r;
188
189         if (getpid() == 1)
190                 opterr = 0;
191
192         while ((c = getopt_long(argc, argv, "erpf:n:o:i:FCchx:y:", options, NULL)) >= 0)
193                 switch (c) {
194
195                 case 'r':
196                         arg_relative = true;
197                         break;
198                 case 'f':
199                         r = safe_atod(optarg, &arg_hz);
200                         if (r < 0)
201                                 log_warning_errno(r, "failed to parse --freq/-f argument '%s': %m",
202                                                   optarg);
203                         break;
204                 case 'F':
205                         arg_filter = false;
206                         break;
207                 case 'C':
208                         arg_show_cmdline = true;
209                         break;
210                 case 'c':
211                         arg_show_cgroup = true;
212                         break;
213                 case 'n':
214                         r = safe_atoi(optarg, &arg_samples_len);
215                         if (r < 0)
216                                 log_warning_errno(r, "failed to parse --samples/-n argument '%s': %m",
217                                                   optarg);
218                         break;
219                 case 'o':
220                         path_kill_slashes(optarg);
221                         strscpy(arg_output_path, sizeof(arg_output_path), optarg);
222                         break;
223                 case 'i':
224                         path_kill_slashes(optarg);
225                         strscpy(arg_init_path, sizeof(arg_init_path), optarg);
226                         break;
227                 case 'p':
228                         arg_pss = true;
229                         break;
230                 case 'x':
231                         r = safe_atod(optarg, &arg_scale_x);
232                         if (r < 0)
233                                 log_warning_errno(r, "failed to parse --scale-x/-x argument '%s': %m",
234                                                   optarg);
235                         break;
236                 case 'y':
237                         r = safe_atod(optarg, &arg_scale_y);
238                         if (r < 0)
239                                 log_warning_errno(r, "failed to parse --scale-y/-y argument '%s': %m",
240                                                   optarg);
241                         break;
242                 case 'e':
243                         arg_entropy = true;
244                         break;
245                 case ARG_PERCPU:
246                         arg_percpu = true;
247                         break;
248                 case 'h':
249                         help();
250                         return 0;
251                 case '?':
252                         if (getpid() != 1)
253                                 return -EINVAL;
254                         else
255                                 return 0;
256                 default:
257                         assert_not_reached("Unhandled option code.");
258                 }
259
260         if (arg_hz <= 0) {
261                 log_error("Frequency needs to be > 0");
262                 return -EINVAL;
263         }
264
265         return 1;
266 }
267
268 static void do_journal_append(char *file) {
269         struct iovec iovec[5];
270         int r, j = 0;
271         ssize_t n;
272         _cleanup_free_ char *bootchart_file = NULL, *bootchart_message = NULL,
273                 *p = NULL;
274         _cleanup_close_ int fd = -1;
275
276         bootchart_file = strappend("BOOTCHART_FILE=", file);
277         if (bootchart_file)
278                 IOVEC_SET_STRING(iovec[j++], bootchart_file);
279
280         IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=9f26aa562cf440c2b16c773d0479b518");
281         IOVEC_SET_STRING(iovec[j++], "PRIORITY=7");
282         bootchart_message = strjoin("MESSAGE=Bootchart created: ", file, NULL);
283         if (bootchart_message)
284                 IOVEC_SET_STRING(iovec[j++], bootchart_message);
285
286         p = malloc(9 + BOOTCHART_MAX);
287         if (!p) {
288                 log_oom();
289                 return;
290         }
291
292         memcpy(p, "BOOTCHART=", 10);
293
294         fd = open(file, O_RDONLY|O_CLOEXEC);
295         if (fd < 0) {
296                 log_error_errno(errno, "Failed to open bootchart data \"%s\": %m", file);
297                 return;
298         }
299
300         n = loop_read(fd, p + 10, BOOTCHART_MAX, false);
301         if (n < 0) {
302                 log_error_errno(n, "Failed to read bootchart data: %m");
303                 return;
304         }
305
306         iovec[j].iov_base = p;
307         iovec[j].iov_len = 10 + n;
308         j++;
309
310         r = sd_journal_sendv(iovec, j);
311         if (r < 0)
312                 log_error_errno(r, "Failed to send bootchart: %m");
313 }
314
315 int main(int argc, char *argv[]) {
316         _cleanup_free_ char *build = NULL;
317         struct sigaction sig = {
318                 .sa_handler = signal_handler,
319         };
320         struct ps_struct *ps;
321         char output_file[PATH_MAX];
322         char datestr[200];
323         time_t t = 0;
324         int r;
325         struct rlimit rlim;
326         bool has_procfs = false;
327
328         parse_conf();
329
330         r = parse_argv(argc, argv);
331         if (r < 0)
332                 return EXIT_FAILURE;
333
334         if (r == 0)
335                 return EXIT_SUCCESS;
336
337         /*
338          * If the kernel executed us through init=/usr/lib/systemd/systemd-bootchart, then
339          * fork:
340          * - parent execs executable specified via init_path[] (/usr/lib/systemd/systemd by default) as pid=1
341          * - child logs data
342          */
343         if (getpid() == 1) {
344                 if (fork()) {
345                         /* parent */
346                         execl(arg_init_path, arg_init_path, NULL);
347                 }
348         }
349         argv[0][0] = '@';
350
351         rlim.rlim_cur = 4096;
352         rlim.rlim_max = 4096;
353         (void) setrlimit(RLIMIT_NOFILE, &rlim);
354
355         /* start with empty ps LL */
356         ps_first = new0(struct ps_struct, 1);
357         if (!ps_first) {
358                 log_oom();
359                 return EXIT_FAILURE;
360         }
361
362         /* handle TERM/INT nicely */
363         sigaction(SIGHUP, &sig, NULL);
364
365         interval = (1.0 / arg_hz) * 1000000000.0;
366
367         log_uptime();
368
369         if (graph_start < 0.0) {
370                 log_error("Failed to setup graph start time.\n\n"
371                           "The system uptime probably includes time that the system was suspended. "
372                           "Use --rel to bypass this issue.");
373                 return EXIT_FAILURE;
374         }
375
376         has_procfs = access("/proc/vmstat", F_OK) == 0;
377
378         LIST_HEAD_INIT(head);
379
380         /* main program loop */
381         for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
382                 int res;
383                 double sample_stop;
384                 struct timespec req;
385                 time_t newint_s;
386                 long newint_ns;
387                 double elapsed;
388                 double timeleft;
389
390                 sampledata = new0(struct list_sample_data, 1);
391                 if (sampledata == NULL) {
392                         log_oom();
393                         return EXIT_FAILURE;
394                 }
395
396                 sampledata->sampletime = gettime_ns();
397                 sampledata->counter = samples;
398
399                 if (sysfd < 0)
400                         sysfd = open("/sys", O_RDONLY|O_CLOEXEC);
401
402                 if (!build) {
403                         if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &build, NULL) == -ENOENT)
404                                 parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &build, NULL);
405                 }
406
407                 if (has_procfs) {
408                         r = log_sample(samples, &sampledata);
409                         if (r < 0)
410                                 return EXIT_FAILURE;
411                 } else {
412                         /* wait for /proc to become available, discarding samples */
413                         has_procfs = access("/proc/vmstat", F_OK) == 0;
414                 }
415
416                 sample_stop = gettime_ns();
417
418                 elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
419                 timeleft = interval - elapsed;
420
421                 newint_s = (time_t)(timeleft / 1000000000.0);
422                 newint_ns = (long)(timeleft - (newint_s * 1000000000.0));
423
424                 /*
425                  * check if we have not consumed our entire timeslice. If we
426                  * do, don't sleep and take a new sample right away.
427                  * we'll lose all the missed samples and overrun our total
428                  * time
429                  */
430                 if (newint_ns > 0 || newint_s > 0) {
431                         req.tv_sec = newint_s;
432                         req.tv_nsec = newint_ns;
433
434                         res = nanosleep(&req, NULL);
435                         if (res) {
436                                 if (errno == EINTR) {
437                                         /* caught signal, probably HUP! */
438                                         break;
439                                 }
440                                 log_error_errno(errno, "nanosleep() failed: %m");
441                                 return EXIT_FAILURE;
442                         }
443                 } else {
444                         overrun++;
445                         /* calculate how many samples we lost and scrap them */
446                         arg_samples_len -= (int)(newint_ns / interval);
447                 }
448                 LIST_PREPEND(link, head, sampledata);
449         }
450
451         /* do some cleanup, close fd's */
452         ps = ps_first;
453         while (ps->next_ps) {
454                 ps = ps->next_ps;
455                 if (ps->schedstat >= 0)
456                         close(ps->schedstat);
457                 if (ps->sched >= 0)
458                         close(ps->sched);
459                 if (ps->smaps)
460                         fclose(ps->smaps);
461         }
462
463         if (!of) {
464                 t = time(NULL);
465                 r = strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
466                 assert_se(r > 0);
467
468                 snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
469                 of = fopen(output_file, "we");
470         }
471
472         if (!of) {
473                 fprintf(stderr, "opening output file '%s': %m\n", output_file);
474                 exit (EXIT_FAILURE);
475         }
476
477         svg_do(strna(build));
478
479         fprintf(stderr, "systemd-bootchart wrote %s\n", output_file);
480
481         do_journal_append(output_file);
482
483         if (of)
484                 fclose(of);
485
486         closedir(proc);
487         if (sysfd >= 0)
488                 close(sysfd);
489
490         /* nitpic cleanups */
491         ps = ps_first->next_ps;
492         while (ps->next_ps) {
493                 struct ps_struct *old;
494
495                 old = ps;
496                 old->sample = ps->first;
497                 ps = ps->next_ps;
498                 while (old->sample->next) {
499                         struct ps_sched_struct *oldsample = old->sample;
500
501                         old->sample = old->sample->next;
502                         free(oldsample);
503                 }
504                 free(old->cgroup);
505                 free(old->sample);
506                 free(old);
507         }
508         free(ps->cgroup);
509         free(ps->sample);
510         free(ps);
511
512         sampledata = head;
513         while (sampledata->link_prev) {
514                 struct list_sample_data *old_sampledata = sampledata;
515                 sampledata = sampledata->link_prev;
516                 free(old_sampledata);
517         }
518         free(sampledata);
519         /* don't complain when overrun once, happens most commonly on 1st sample */
520         if (overrun > 1)
521                 log_warning("systemd-boochart: sample time overrun %i times\n", overrun);
522
523         return 0;
524 }