chiark / gitweb /
8e9a62f91d9f9402cdcf14cab0190982f852833d
[elogind.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <fcntl.h>
32 #include <time.h>
33
34 #include "util.h"
35 #include "time-util.h"
36 #include "strxcpyx.h"
37 #include "store.h"
38 #include "bootchart.h"
39 #include "cgroup-util.h"
40
41 /*
42  * Alloc a static 4k buffer for stdio - primarily used to increase
43  * PSS buffering from the default 1k stdin buffer to reduce
44  * read() overhead.
45  */
46 static char smaps_buf[4096];
47 static int skip = 0;
48 DIR *proc;
49 int procfd = -1;
50
51 double gettime_ns(void) {
52         struct timespec n;
53
54         clock_gettime(CLOCK_MONOTONIC, &n);
55
56         return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
57 }
58
59 static double gettime_up(void) {
60         struct timespec n;
61
62         clock_gettime(CLOCK_BOOTTIME, &n);
63         return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
64 }
65
66 void log_uptime(void) {
67         if (arg_relative)
68                 graph_start = log_start = gettime_ns();
69         else {
70                 double uptime = gettime_up();
71
72                 log_start = gettime_ns();
73                 graph_start = log_start - uptime;
74         }
75 }
76
77 static char *bufgetline(char *buf) {
78         char *c;
79
80         if (!buf)
81                 return NULL;
82
83         c = strchr(buf, '\n');
84         if (c)
85                 c++;
86         return c;
87 }
88
89 static int pid_cmdline_strscpy(char *buffer, size_t buf_len, int pid) {
90         char filename[PATH_MAX];
91         _cleanup_close_ int fd=-1;
92         ssize_t n;
93
94         sprintf(filename, "%d/cmdline", pid);
95         fd = openat(procfd, filename, O_RDONLY);
96         if (fd < 0)
97                 return -errno;
98
99         n = read(fd, buffer, buf_len-1);
100         if (n > 0) {
101                 int i;
102                 for (i = 0; i < n; i++)
103                         if (buffer[i] == '\0')
104                                 buffer[i] = ' ';
105                 buffer[n] = '\0';
106         }
107         return 0;
108 }
109
110 void log_sample(int sample, struct list_sample_data **ptr) {
111         static int vmstat = -1;
112         static int schedstat = -1;
113         char buf[4096];
114         char key[256];
115         char val[256];
116         char rt[256];
117         char wt[256];
118         char *m;
119         int c;
120         int p;
121         int mod;
122         static int e_fd;
123         ssize_t s;
124         ssize_t n;
125         struct dirent *ent;
126         int fd;
127         struct list_sample_data *sampledata;
128         struct ps_sched_struct *ps_prev = NULL;
129
130         sampledata = *ptr;
131
132         /* all the per-process stuff goes here */
133         if (!proc) {
134                 /* find all processes */
135                 proc = opendir("/proc");
136                 if (!proc)
137                         return;
138                 procfd = dirfd(proc);
139         } else {
140                 rewinddir(proc);
141         }
142
143         if (vmstat < 0) {
144                 /* block stuff */
145                 vmstat = openat(procfd, "vmstat", O_RDONLY);
146                 if (vmstat == -1) {
147                         log_error_errno(errno, "Failed to open /proc/vmstat: %m");
148                         exit(EXIT_FAILURE);
149                 }
150         }
151
152         n = pread(vmstat, buf, sizeof(buf) - 1, 0);
153         if (n <= 0) {
154                 close(vmstat);
155                 vmstat = -1;
156                 return;
157         }
158         buf[n] = '\0';
159
160         m = buf;
161         while (m) {
162                 if (sscanf(m, "%s %s", key, val) < 2)
163                         goto vmstat_next;
164                 if (streq(key, "pgpgin"))
165                         sampledata->blockstat.bi = atoi(val);
166                 if (streq(key, "pgpgout")) {
167                         sampledata->blockstat.bo = atoi(val);
168                         break;
169                 }
170 vmstat_next:
171                 m = bufgetline(m);
172                 if (!m)
173                         break;
174         }
175
176         if (schedstat < 0) {
177                 /* overall CPU utilization */
178                 schedstat = openat(procfd, "schedstat", O_RDONLY);
179                 if (schedstat == -1) {
180                         log_error_errno(errno, "Failed to open /proc/schedstat (requires CONFIG_SCHEDSTATS=y in kernel config): %m");
181                         exit(EXIT_FAILURE);
182                 }
183         }
184
185         n = pread(schedstat, buf, sizeof(buf) - 1, 0);
186         if (n <= 0) {
187                 close(schedstat);
188                 schedstat = -1;
189                 return;
190         }
191         buf[n] = '\0';
192
193         m = buf;
194         while (m) {
195                 int r;
196
197                 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
198                         goto schedstat_next;
199
200                 if (strstr(key, "cpu")) {
201                         r = safe_atoi((const char*)(key+3), &c);
202                         if (r < 0 || c > MAXCPUS -1)
203                                 /* Oops, we only have room for MAXCPUS data */
204                                 break;
205                         sampledata->runtime[c] = atoll(rt);
206                         sampledata->waittime[c] = atoll(wt);
207
208                         if (c == cpus)
209                                 cpus = c + 1;
210                 }
211 schedstat_next:
212                 m = bufgetline(m);
213                 if (!m)
214                         break;
215         }
216
217         if (arg_entropy) {
218                 if (!e_fd) {
219                         e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY);
220                 }
221
222                 if (e_fd) {
223                         n = pread(e_fd, buf, sizeof(buf) - 1, 0);
224                         if (n > 0) {
225                                 buf[n] = '\0';
226                                 sampledata->entropy_avail = atoi(buf);
227                         }
228                 }
229         }
230
231         while ((ent = readdir(proc)) != NULL) {
232                 char filename[PATH_MAX];
233                 int pid;
234                 struct ps_struct *ps;
235
236                 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
237                         continue;
238
239                 pid = atoi(ent->d_name);
240
241                 if (pid >= MAXPIDS)
242                         continue;
243
244                 ps = ps_first;
245                 while (ps->next_ps) {
246                         ps = ps->next_ps;
247                         if (ps->pid == pid)
248                                 break;
249                 }
250
251                 /* end of our LL? then append a new record */
252                 if (ps->pid != pid) {
253                         _cleanup_fclose_ FILE *st = NULL;
254                         char t[32];
255                         struct ps_struct *parent;
256                         int r;
257
258                         ps->next_ps = new0(struct ps_struct, 1);
259                         if (!ps->next_ps) {
260                                 log_oom();
261                                 exit (EXIT_FAILURE);
262                         }
263                         ps = ps->next_ps;
264                         ps->pid = pid;
265                         ps->sched = -1;
266                         ps->schedstat = -1;
267
268                         ps->sample = new0(struct ps_sched_struct, 1);
269                         if (!ps->sample) {
270                                 log_oom();
271                                 exit (EXIT_FAILURE);
272                         }
273                         ps->sample->sampledata = sampledata;
274
275                         pscount++;
276
277                         /* mark our first sample */
278                         ps->first = ps->last = ps->sample;
279                         ps->sample->runtime = atoll(rt);
280                         ps->sample->waittime = atoll(wt);
281
282                         /* get name, start time */
283                         if (ps->sched < 0) {
284                                 sprintf(filename, "%d/sched", pid);
285                                 ps->sched = openat(procfd, filename, O_RDONLY);
286                                 if (ps->sched == -1)
287                                         continue;
288                         }
289
290                         s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
291                         if (s <= 0) {
292                                 close(ps->sched);
293                                 ps->sched = -1;
294                                 continue;
295                         }
296                         buf[s] = '\0';
297
298                         if (!sscanf(buf, "%s %*s %*s", key))
299                                 continue;
300
301                         strscpy(ps->name, sizeof(ps->name), key);
302
303                         /* cmdline */
304                         if (arg_show_cmdline)
305                                 pid_cmdline_strscpy(ps->name, sizeof(ps->name), pid);
306
307                         /* discard line 2 */
308                         m = bufgetline(buf);
309                         if (!m)
310                                 continue;
311
312                         m = bufgetline(m);
313                         if (!m)
314                                 continue;
315
316                         if (!sscanf(m, "%*s %*s %s", t))
317                                 continue;
318
319                         r = safe_atod(t, &ps->starttime);
320                         if (r < 0)
321                                 continue;
322
323                         ps->starttime /= 1000.0;
324
325                         if (arg_show_cgroup)
326                                 /* if this fails, that's OK */
327                                 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
328                                                 ps->pid, &ps->cgroup);
329
330                         /* ppid */
331                         sprintf(filename, "%d/stat", pid);
332                         fd = openat(procfd, filename, O_RDONLY);
333                         if (fd == -1)
334                                 continue;
335                         st = fdopen(fd, "r");
336                         if (!st) {
337                                 close(fd);
338                                 continue;
339                         }
340                         if (!fscanf(st, "%*s %*s %*s %i", &p)) {
341                                 continue;
342                         }
343                         ps->ppid = p;
344
345                         /*
346                          * setup child pointers
347                          *
348                          * these are used to paint the tree coherently later
349                          * each parent has a LL of children, and a LL of siblings
350                          */
351                         if (pid == 1)
352                                 continue; /* nothing to do for init atm */
353
354                         /* kthreadd has ppid=0, which breaks our tree ordering */
355                         if (ps->ppid == 0)
356                                 ps->ppid = 1;
357
358                         parent = ps_first;
359                         while ((parent->next_ps && parent->pid != ps->ppid))
360                                 parent = parent->next_ps;
361
362                         if (parent->pid != ps->ppid) {
363                                 /* orphan */
364                                 ps->ppid = 1;
365                                 parent = ps_first->next_ps;
366                         }
367
368                         ps->parent = parent;
369
370                         if (!parent->children) {
371                                 /* it's the first child */
372                                 parent->children = ps;
373                         } else {
374                                 /* walk all children and append */
375                                 struct ps_struct *children;
376                                 children = parent->children;
377                                 while (children->next)
378                                         children = children->next;
379                                 children->next = ps;
380                         }
381                 }
382
383                 /* else -> found pid, append data in ps */
384
385                 /* below here is all continuous logging parts - we get here on every
386                  * iteration */
387
388                 /* rt, wt */
389                 if (ps->schedstat < 0) {
390                         sprintf(filename, "%d/schedstat", pid);
391                         ps->schedstat = openat(procfd, filename, O_RDONLY);
392                         if (ps->schedstat == -1)
393                                 continue;
394                 }
395                 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
396                 if (s <= 0) {
397                         /* clean up our file descriptors - assume that the process exited */
398                         close(ps->schedstat);
399                         ps->schedstat = -1;
400                         if (ps->sched) {
401                                 close(ps->sched);
402                                 ps->sched = -1;
403                         }
404                         //if (ps->smaps)
405                         //        fclose(ps->smaps);
406                         continue;
407                 }
408                 buf[s] = '\0';
409
410                 if (!sscanf(buf, "%s %s %*s", rt, wt))
411                         continue;
412
413                 ps->sample->next = new0(struct ps_sched_struct, 1);
414                 if (!ps->sample->next) {
415                         log_oom();
416                         exit(EXIT_FAILURE);
417                 }
418                 ps->sample->next->prev = ps->sample;
419                 ps->sample = ps->sample->next;
420                 ps->last = ps->sample;
421                 ps->sample->runtime = atoll(rt);
422                 ps->sample->waittime = atoll(wt);
423                 ps->sample->sampledata = sampledata;
424                 ps->sample->ps_new = ps;
425                 if (ps_prev) {
426                         ps_prev->cross = ps->sample;
427                 }
428                 ps_prev = ps->sample;
429                 ps->total = (ps->last->runtime - ps->first->runtime)
430                             / 1000000000.0;
431
432                 if (!arg_pss)
433                         goto catch_rename;
434
435                 /* Pss */
436                 if (!ps->smaps) {
437                         sprintf(filename, "%d/smaps", pid);
438                         fd = openat(procfd, filename, O_RDONLY);
439                         if (fd == -1)
440                                 continue;
441                         ps->smaps = fdopen(fd, "r");
442                         if (!ps->smaps) {
443                                 close(fd);
444                                 continue;
445                         }
446                         setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
447                 }
448                 else {
449                         rewind(ps->smaps);
450                 }
451                 /* test to see if we need to skip another field */
452                 if (skip == 0) {
453                         if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
454                                 continue;
455                         }
456                         if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
457                                 continue;
458                         }
459                         if (buf[392] == 'V') {
460                                 skip = 2;
461                         }
462                         else {
463                                 skip = 1;
464                         }
465                         rewind(ps->smaps);
466                 }
467                 while (1) {
468                         int pss_kb;
469
470                         /* skip one line, this contains the object mapped. */
471                         if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
472                                 break;
473                         }
474                         /* then there's a 28 char 14 line block */
475                         if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
476                                 break;
477                         }
478                         pss_kb = atoi(&buf[61]);
479                         ps->sample->pss += pss_kb;
480
481                         /* skip one more line if this is a newer kernel */
482                         if (skip == 2) {
483                                if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
484                                        break;
485                         }
486                 }
487                 if (ps->sample->pss > ps->pss_max)
488                         ps->pss_max = ps->sample->pss;
489
490 catch_rename:
491                 /* catch process rename, try to randomize time */
492                 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
493                 if (((samples - ps->pid) + pid) % (int)(mod) == 0) {
494
495                         /* re-fetch name */
496                         /* get name, start time */
497                         if (!ps->sched) {
498                                 sprintf(filename, "%d/sched", pid);
499                                 ps->sched = openat(procfd, filename, O_RDONLY);
500                                 if (ps->sched == -1)
501                                         continue;
502                         }
503                         s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
504                         if (s <= 0) {
505                                 /* clean up file descriptors */
506                                 close(ps->sched);
507                                 ps->sched = -1;
508                                 if (ps->schedstat) {
509                                         close(ps->schedstat);
510                                         ps->schedstat = -1;
511                                 }
512                                 //if (ps->smaps)
513                                 //        fclose(ps->smaps);
514                                 continue;
515                         }
516                         buf[s] = '\0';
517
518                         if (!sscanf(buf, "%s %*s %*s", key))
519                                 continue;
520
521                         strscpy(ps->name, sizeof(ps->name), key);
522
523                         /* cmdline */
524                         if (arg_show_cmdline)
525                                 pid_cmdline_strscpy(ps->name, sizeof(ps->name), pid);
526                 }
527         }
528 }