chiark / gitweb /
bootchart: clean up sysfd and proc handling
[elogind.git] / src / bootchart / bootchart.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 /***
26
27   Many thanks to those who contributed ideas and code:
28   - Ziga Mahkovec - Original bootchart author
29   - Anders Norgaard - PyBootchartgui
30   - Michael Meeks - bootchart2
31   - Scott James Remnant - Ubuntu C-based logger
32   - Arjan van der Ven - for the idea to merge bootgraph.pl functionality
33
34  ***/
35
36 #include <sys/resource.h>
37 #include <stdio.h>
38 #include <signal.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <time.h>
43 #include <getopt.h>
44 #include <limits.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdbool.h>
48 #include "systemd/sd-journal.h"
49
50 #include "util.h"
51 #include "fileio.h"
52 #include "macro.h"
53 #include "conf-parser.h"
54 #include "strxcpyx.h"
55 #include "path-util.h"
56 #include "store.h"
57 #include "svg.h"
58 #include "bootchart.h"
59 #include "list.h"
60
61 double graph_start;
62 double log_start;
63 struct ps_struct *ps_first;
64 int pscount;
65 int cpus;
66 double interval;
67 FILE *of = NULL;
68 int overrun = 0;
69 static int exiting = 0;
70
71 #define DEFAULT_SAMPLES_LEN 500
72 #define DEFAULT_HZ 25.0
73 #define DEFAULT_SCALE_X 100.0 /* 100px = 1sec */
74 #define DEFAULT_SCALE_Y 20.0  /* 16px = 1 process bar */
75 #define DEFAULT_INIT ROOTLIBEXECDIR "/systemd"
76 #define DEFAULT_OUTPUT "/run/log"
77
78 /* graph defaults */
79 bool arg_entropy = false;
80 bool initcall = true;
81 bool arg_relative = false;
82 bool arg_filter = true;
83 bool arg_show_cmdline = false;
84 bool arg_show_cgroup = false;
85 bool arg_pss = false;
86 bool arg_percpu = false;
87 int samples;
88 int arg_samples_len = DEFAULT_SAMPLES_LEN; /* we record len+1 (1 start sample) */
89 double arg_hz = DEFAULT_HZ;
90 double arg_scale_x = DEFAULT_SCALE_X;
91 double arg_scale_y = DEFAULT_SCALE_Y;
92 static struct list_sample_data *sampledata;
93 struct list_sample_data *head;
94
95 char arg_init_path[PATH_MAX] = DEFAULT_INIT;
96 char arg_output_path[PATH_MAX] = DEFAULT_OUTPUT;
97
98 static void signal_handler(int sig) {
99         if (sig++)
100                 sig--;
101         exiting = 1;
102 }
103
104 #define BOOTCHART_CONF "/etc/systemd/bootchart.conf"
105
106 #define BOOTCHART_MAX (16*1024*1024)
107
108 static void parse_conf(void) {
109         char *init = NULL, *output = NULL;
110         const ConfigTableItem items[] = {
111                 { "Bootchart", "Samples",          config_parse_int,    0, &arg_samples_len },
112                 { "Bootchart", "Frequency",        config_parse_double, 0, &arg_hz          },
113                 { "Bootchart", "Relative",         config_parse_bool,   0, &arg_relative    },
114                 { "Bootchart", "Filter",           config_parse_bool,   0, &arg_filter      },
115                 { "Bootchart", "Output",           config_parse_path,   0, &output          },
116                 { "Bootchart", "Init",             config_parse_path,   0, &init            },
117                 { "Bootchart", "PlotMemoryUsage",  config_parse_bool,   0, &arg_pss         },
118                 { "Bootchart", "PlotEntropyGraph", config_parse_bool,   0, &arg_entropy     },
119                 { "Bootchart", "ScaleX",           config_parse_double, 0, &arg_scale_x     },
120                 { "Bootchart", "ScaleY",           config_parse_double, 0, &arg_scale_y     },
121                 { "Bootchart", "ControlGroup",     config_parse_bool,   0, &arg_show_cgroup },
122                 { "Bootchart", "PerCPU",           config_parse_bool,   0, &arg_percpu      },
123                 { NULL, NULL, NULL, 0, NULL }
124         };
125
126         config_parse_many(BOOTCHART_CONF,
127                           CONF_DIRS_NULSTR("systemd/bootchart.conf"),
128                           NULL, config_item_table_lookup, items, true, NULL);
129
130         if (init != NULL)
131                 strscpy(arg_init_path, sizeof(arg_init_path), init);
132         if (output != NULL)
133                 strscpy(arg_output_path, sizeof(arg_output_path), output);
134 }
135
136 static void help(void) {
137         printf("Usage: %s [OPTIONS]\n\n"
138                "Options:\n"
139                "  -r --rel             Record time relative to recording\n"
140                "  -f --freq=FREQ       Sample frequency [%g]\n"
141                "  -n --samples=N       Stop sampling at [%d] samples\n"
142                "  -x --scale-x=N       Scale the graph horizontally [%g] \n"
143                "  -y --scale-y=N       Scale the graph vertically [%g] \n"
144                "  -p --pss             Enable PSS graph (CPU intensive)\n"
145                "  -e --entropy         Enable the entropy_avail graph\n"
146                "  -o --output=PATH     Path to output files [%s]\n"
147                "  -i --init=PATH       Path to init executable [%s]\n"
148                "  -F --no-filter       Disable filtering of unimportant or ephemeral processes\n"
149                "  -C --cmdline         Display full command lines with arguments\n"
150                "  -c --control-group   Display process control group\n"
151                "     --per-cpu         Draw each CPU utilization and wait bar also\n"
152                "  -h --help            Display this message\n\n"
153                "See bootchart.conf for more information.\n",
154                program_invocation_short_name,
155                DEFAULT_HZ,
156                DEFAULT_SAMPLES_LEN,
157                DEFAULT_SCALE_X,
158                DEFAULT_SCALE_Y,
159                DEFAULT_OUTPUT,
160                DEFAULT_INIT);
161 }
162
163 static int parse_argv(int argc, char *argv[]) {
164
165         enum {
166                 ARG_PERCPU = 0x100,
167         };
168
169         static const struct option options[] = {
170                 {"rel",           no_argument,        NULL,  'r'       },
171                 {"freq",          required_argument,  NULL,  'f'       },
172                 {"samples",       required_argument,  NULL,  'n'       },
173                 {"pss",           no_argument,        NULL,  'p'       },
174                 {"output",        required_argument,  NULL,  'o'       },
175                 {"init",          required_argument,  NULL,  'i'       },
176                 {"no-filter",     no_argument,        NULL,  'F'       },
177                 {"cmdline",       no_argument,        NULL,  'C'       },
178                 {"control-group", no_argument,        NULL,  'c'       },
179                 {"help",          no_argument,        NULL,  'h'       },
180                 {"scale-x",       required_argument,  NULL,  'x'       },
181                 {"scale-y",       required_argument,  NULL,  'y'       },
182                 {"entropy",       no_argument,        NULL,  'e'       },
183                 {"per-cpu",       no_argument,        NULL,  ARG_PERCPU},
184                 {}
185         };
186         int c, r;
187
188         if (getpid() == 1)
189                 opterr = 0;
190
191         while ((c = getopt_long(argc, argv, "erpf:n:o:i:FCchx:y:", options, NULL)) >= 0)
192                 switch (c) {
193
194                 case 'r':
195                         arg_relative = true;
196                         break;
197                 case 'f':
198                         r = safe_atod(optarg, &arg_hz);
199                         if (r < 0)
200                                 log_warning_errno(r, "failed to parse --freq/-f argument '%s': %m",
201                                                   optarg);
202                         break;
203                 case 'F':
204                         arg_filter = false;
205                         break;
206                 case 'C':
207                         arg_show_cmdline = true;
208                         break;
209                 case 'c':
210                         arg_show_cgroup = true;
211                         break;
212                 case 'n':
213                         r = safe_atoi(optarg, &arg_samples_len);
214                         if (r < 0)
215                                 log_warning_errno(r, "failed to parse --samples/-n argument '%s': %m",
216                                                   optarg);
217                         break;
218                 case 'o':
219                         path_kill_slashes(optarg);
220                         strscpy(arg_output_path, sizeof(arg_output_path), optarg);
221                         break;
222                 case 'i':
223                         path_kill_slashes(optarg);
224                         strscpy(arg_init_path, sizeof(arg_init_path), optarg);
225                         break;
226                 case 'p':
227                         arg_pss = true;
228                         break;
229                 case 'x':
230                         r = safe_atod(optarg, &arg_scale_x);
231                         if (r < 0)
232                                 log_warning_errno(r, "failed to parse --scale-x/-x argument '%s': %m",
233                                                   optarg);
234                         break;
235                 case 'y':
236                         r = safe_atod(optarg, &arg_scale_y);
237                         if (r < 0)
238                                 log_warning_errno(r, "failed to parse --scale-y/-y argument '%s': %m",
239                                                   optarg);
240                         break;
241                 case 'e':
242                         arg_entropy = true;
243                         break;
244                 case ARG_PERCPU:
245                         arg_percpu = true;
246                         break;
247                 case 'h':
248                         help();
249                         return 0;
250                 case '?':
251                         if (getpid() != 1)
252                                 return -EINVAL;
253                         else
254                                 return 0;
255                 default:
256                         assert_not_reached("Unhandled option code.");
257                 }
258
259         if (arg_hz <= 0) {
260                 log_error("Frequency needs to be > 0");
261                 return -EINVAL;
262         }
263
264         return 1;
265 }
266
267 static void do_journal_append(char *file) {
268         struct iovec iovec[5];
269         int r, j = 0;
270         ssize_t n;
271         _cleanup_free_ char *bootchart_file = NULL, *bootchart_message = NULL,
272                 *p = NULL;
273         _cleanup_close_ int fd = -1;
274
275         bootchart_file = strappend("BOOTCHART_FILE=", file);
276         if (bootchart_file)
277                 IOVEC_SET_STRING(iovec[j++], bootchart_file);
278
279         IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=9f26aa562cf440c2b16c773d0479b518");
280         IOVEC_SET_STRING(iovec[j++], "PRIORITY=7");
281         bootchart_message = strjoin("MESSAGE=Bootchart created: ", file, NULL);
282         if (bootchart_message)
283                 IOVEC_SET_STRING(iovec[j++], bootchart_message);
284
285         p = malloc(9 + BOOTCHART_MAX);
286         if (!p) {
287                 log_oom();
288                 return;
289         }
290
291         memcpy(p, "BOOTCHART=", 10);
292
293         fd = open(file, O_RDONLY|O_CLOEXEC);
294         if (fd < 0) {
295                 log_error_errno(errno, "Failed to open bootchart data \"%s\": %m", file);
296                 return;
297         }
298
299         n = loop_read(fd, p + 10, BOOTCHART_MAX, false);
300         if (n < 0) {
301                 log_error_errno(n, "Failed to read bootchart data: %m");
302                 return;
303         }
304
305         iovec[j].iov_base = p;
306         iovec[j].iov_len = 10 + n;
307         j++;
308
309         r = sd_journal_sendv(iovec, j);
310         if (r < 0)
311                 log_error_errno(r, "Failed to send bootchart: %m");
312 }
313
314 int main(int argc, char *argv[]) {
315         _cleanup_free_ char *build = NULL;
316         _cleanup_close_ int sysfd = -1;
317         _cleanup_closedir_ DIR *proc = NULL;
318         struct sigaction sig = {
319                 .sa_handler = signal_handler,
320         };
321         struct ps_struct *ps;
322         char output_file[PATH_MAX];
323         char datestr[200];
324         time_t t = 0;
325         int r;
326         struct rlimit rlim;
327
328         parse_conf();
329
330         r = parse_argv(argc, argv);
331         if (r < 0)
332                 return EXIT_FAILURE;
333
334         if (r == 0)
335                 return EXIT_SUCCESS;
336
337         /*
338          * If the kernel executed us through init=/usr/lib/systemd/systemd-bootchart, then
339          * fork:
340          * - parent execs executable specified via init_path[] (/usr/lib/systemd/systemd by default) as pid=1
341          * - child logs data
342          */
343         if (getpid() == 1) {
344                 if (fork()) {
345                         /* parent */
346                         execl(arg_init_path, arg_init_path, NULL);
347                 }
348         }
349         argv[0][0] = '@';
350
351         rlim.rlim_cur = 4096;
352         rlim.rlim_max = 4096;
353         (void) setrlimit(RLIMIT_NOFILE, &rlim);
354
355         /* start with empty ps LL */
356         ps_first = new0(struct ps_struct, 1);
357         if (!ps_first) {
358                 log_oom();
359                 return EXIT_FAILURE;
360         }
361
362         /* handle TERM/INT nicely */
363         sigaction(SIGHUP, &sig, NULL);
364
365         interval = (1.0 / arg_hz) * 1000000000.0;
366
367         log_uptime();
368
369         if (graph_start < 0.0) {
370                 log_error("Failed to setup graph start time.\n\n"
371                           "The system uptime probably includes time that the system was suspended. "
372                           "Use --rel to bypass this issue.");
373                 return EXIT_FAILURE;
374         }
375
376         LIST_HEAD_INIT(head);
377
378         /* main program loop */
379         for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
380                 int res;
381                 double sample_stop;
382                 struct timespec req;
383                 time_t newint_s;
384                 long newint_ns;
385                 double elapsed;
386                 double timeleft;
387
388                 sampledata = new0(struct list_sample_data, 1);
389                 if (sampledata == NULL) {
390                         log_oom();
391                         return EXIT_FAILURE;
392                 }
393
394                 sampledata->sampletime = gettime_ns();
395                 sampledata->counter = samples;
396
397                 if (sysfd < 0)
398                         sysfd = open("/sys", O_RDONLY|O_CLOEXEC);
399
400                 if (!build) {
401                         if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &build, NULL) == -ENOENT)
402                                 parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &build, NULL);
403                 }
404
405                 if (proc)
406                         rewinddir(proc);
407                 else
408                         proc = opendir("/proc");
409
410                 /* wait for /proc to become available, discarding samples */
411                 if (proc) {
412                         r = log_sample(proc, samples, &sampledata);
413                         if (r < 0)
414                                 return EXIT_FAILURE;
415                 }
416
417                 sample_stop = gettime_ns();
418
419                 elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
420                 timeleft = interval - elapsed;
421
422                 newint_s = (time_t)(timeleft / 1000000000.0);
423                 newint_ns = (long)(timeleft - (newint_s * 1000000000.0));
424
425                 /*
426                  * check if we have not consumed our entire timeslice. If we
427                  * do, don't sleep and take a new sample right away.
428                  * we'll lose all the missed samples and overrun our total
429                  * time
430                  */
431                 if (newint_ns > 0 || newint_s > 0) {
432                         req.tv_sec = newint_s;
433                         req.tv_nsec = newint_ns;
434
435                         res = nanosleep(&req, NULL);
436                         if (res) {
437                                 if (errno == EINTR) {
438                                         /* caught signal, probably HUP! */
439                                         break;
440                                 }
441                                 log_error_errno(errno, "nanosleep() failed: %m");
442                                 return EXIT_FAILURE;
443                         }
444                 } else {
445                         overrun++;
446                         /* calculate how many samples we lost and scrap them */
447                         arg_samples_len -= (int)(newint_ns / interval);
448                 }
449                 LIST_PREPEND(link, head, sampledata);
450         }
451
452         /* do some cleanup, close fd's */
453         ps = ps_first;
454         while (ps->next_ps) {
455                 ps = ps->next_ps;
456                 if (ps->schedstat >= 0)
457                         close(ps->schedstat);
458                 if (ps->sched >= 0)
459                         close(ps->sched);
460                 if (ps->smaps)
461                         fclose(ps->smaps);
462         }
463
464         if (!of) {
465                 t = time(NULL);
466                 r = strftime(datestr, sizeof(datestr), "%Y%m%d-%H%M", localtime(&t));
467                 assert_se(r > 0);
468
469                 snprintf(output_file, PATH_MAX, "%s/bootchart-%s.svg", arg_output_path, datestr);
470                 of = fopen(output_file, "we");
471         }
472
473         if (!of) {
474                 log_error("Error opening output file '%s': %m\n", output_file);
475                 return EXIT_FAILURE;
476         }
477
478         r = svg_do(strna(build));
479         if (r < 0) {
480                 log_error_errno(r, "Error generating svg file: %m\n");
481                 return EXIT_FAILURE;
482         }
483
484         log_info("systemd-bootchart wrote %s\n", output_file);
485
486         do_journal_append(output_file);
487
488         if (of)
489                 fclose(of);
490
491         /* nitpic cleanups */
492         ps = ps_first->next_ps;
493         while (ps->next_ps) {
494                 struct ps_struct *old;
495
496                 old = ps;
497                 old->sample = ps->first;
498                 ps = ps->next_ps;
499                 while (old->sample->next) {
500                         struct ps_sched_struct *oldsample = old->sample;
501
502                         old->sample = old->sample->next;
503                         free(oldsample);
504                 }
505                 free(old->cgroup);
506                 free(old->sample);
507                 free(old);
508         }
509         free(ps->cgroup);
510         free(ps->sample);
511         free(ps);
512
513         sampledata = head;
514         while (sampledata->link_prev) {
515                 struct list_sample_data *old_sampledata = sampledata;
516                 sampledata = sampledata->link_prev;
517                 free(old_sampledata);
518         }
519         free(sampledata);
520         /* don't complain when overrun once, happens most commonly on 1st sample */
521         if (overrun > 1)
522                 log_warning("systemd-boochart: sample time overrun %i times\n", overrun);
523
524         return 0;
525 }