chiark / gitweb /
612510408454439b7974437e4a43c55767a52117
[elogind.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright (C) 2009-2013 Intel Corporation
7
8   Authors:
9     Auke Kok <auke-jan.h.kok@intel.com>
10
11   systemd is free software; you can redistribute it and/or modify it
12   under the terms of the GNU Lesser General Public License as published by
13   the Free Software Foundation; either version 2.1 of the License, or
14   (at your option) any later version.
15
16   systemd is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   Lesser General Public License for more details.
20
21   You should have received a copy of the GNU Lesser General Public License
22   along with systemd; If not, see <http://www.gnu.org/licenses/>.
23  ***/
24
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <fcntl.h>
32 #include <time.h>
33
34 #include "util.h"
35 #include "time-util.h"
36 #include "strxcpyx.h"
37 #include "store.h"
38 #include "bootchart.h"
39 #include "cgroup-util.h"
40
41 /*
42  * Alloc a static 4k buffer for stdio - primarily used to increase
43  * PSS buffering from the default 1k stdin buffer to reduce
44  * read() overhead.
45  */
46 static char smaps_buf[4096];
47 static int skip = 0;
48
49 double gettime_ns(void) {
50         struct timespec n;
51
52         clock_gettime(CLOCK_MONOTONIC, &n);
53
54         return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
55 }
56
57 static double gettime_up(void) {
58         struct timespec n;
59
60         clock_gettime(CLOCK_BOOTTIME, &n);
61         return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
62 }
63
64 void log_uptime(void) {
65         if (arg_relative)
66                 graph_start = log_start = gettime_ns();
67         else {
68                 double uptime = gettime_up();
69
70                 log_start = gettime_ns();
71                 graph_start = log_start - uptime;
72         }
73 }
74
75 static char *bufgetline(char *buf) {
76         char *c;
77
78         if (!buf)
79                 return NULL;
80
81         c = strchr(buf, '\n');
82         if (c)
83                 c++;
84         return c;
85 }
86
87 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
88         char filename[PATH_MAX];
89         _cleanup_close_ int fd=-1;
90         ssize_t n;
91
92         sprintf(filename, "%d/cmdline", pid);
93         fd = openat(procfd, filename, O_RDONLY);
94         if (fd < 0)
95                 return -errno;
96
97         n = read(fd, buffer, buf_len-1);
98         if (n > 0) {
99                 int i;
100                 for (i = 0; i < n; i++)
101                         if (buffer[i] == '\0')
102                                 buffer[i] = ' ';
103                 buffer[n] = '\0';
104         }
105         return 0;
106 }
107
108 int log_sample(DIR *proc, int sample, struct list_sample_data **ptr) {
109         static int vmstat = -1;
110         static int schedstat = -1;
111         char buf[4096];
112         char key[256];
113         char val[256];
114         char rt[256];
115         char wt[256];
116         char *m;
117         int c;
118         int p;
119         int mod;
120         static int e_fd = -1;
121         ssize_t s;
122         ssize_t n;
123         struct dirent *ent;
124         int fd;
125         struct list_sample_data *sampledata;
126         struct ps_sched_struct *ps_prev = NULL;
127         int procfd;
128
129         sampledata = *ptr;
130
131         procfd = dirfd(proc);
132         if (procfd < 0)
133                 return -errno;
134
135         if (vmstat < 0) {
136                 /* block stuff */
137                 vmstat = openat(procfd, "vmstat", O_RDONLY);
138                 if (vmstat == -1)
139                         return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
140         }
141
142         n = pread(vmstat, buf, sizeof(buf) - 1, 0);
143         if (n <= 0) {
144                 vmstat = safe_close(vmstat);
145                 if (n < 0)
146                         return -errno;
147                 return -ENODATA;
148         }
149         buf[n] = '\0';
150
151         m = buf;
152         while (m) {
153                 if (sscanf(m, "%s %s", key, val) < 2)
154                         goto vmstat_next;
155                 if (streq(key, "pgpgin"))
156                         sampledata->blockstat.bi = atoi(val);
157                 if (streq(key, "pgpgout")) {
158                         sampledata->blockstat.bo = atoi(val);
159                         break;
160                 }
161 vmstat_next:
162                 m = bufgetline(m);
163                 if (!m)
164                         break;
165         }
166
167         if (schedstat < 0) {
168                 /* overall CPU utilization */
169                 schedstat = openat(procfd, "schedstat", O_RDONLY);
170                 if (schedstat == -1)
171                         return log_error_errno(errno, "Failed to open /proc/schedstat (requires CONFIG_SCHEDSTATS=y in kernel config): %m");
172         }
173
174         n = pread(schedstat, buf, sizeof(buf) - 1, 0);
175         if (n <= 0) {
176                 schedstat = safe_close(schedstat);
177                 if (n < 0)
178                         return -errno;
179                 return -ENODATA;
180         }
181         buf[n] = '\0';
182
183         m = buf;
184         while (m) {
185                 int r;
186
187                 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
188                         goto schedstat_next;
189
190                 if (strstr(key, "cpu")) {
191                         r = safe_atoi((const char*)(key+3), &c);
192                         if (r < 0 || c > MAXCPUS -1)
193                                 /* Oops, we only have room for MAXCPUS data */
194                                 break;
195                         sampledata->runtime[c] = atoll(rt);
196                         sampledata->waittime[c] = atoll(wt);
197
198                         if (c == cpus)
199                                 cpus = c + 1;
200                 }
201 schedstat_next:
202                 m = bufgetline(m);
203                 if (!m)
204                         break;
205         }
206
207         if (arg_entropy) {
208                 if (e_fd < 0) {
209                         e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY);
210                         if (e_fd == -1)
211                                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
212                 }
213
214                 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
215                 if (n <= 0) {
216                         close(e_fd);
217                         e_fd = -1;
218                 } else {
219                         buf[n] = '\0';
220                         sampledata->entropy_avail = atoi(buf);
221                 }
222         }
223
224         while ((ent = readdir(proc)) != NULL) {
225                 char filename[PATH_MAX];
226                 int pid;
227                 struct ps_struct *ps;
228
229                 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
230                         continue;
231
232                 pid = atoi(ent->d_name);
233
234                 if (pid >= MAXPIDS)
235                         continue;
236
237                 ps = ps_first;
238                 while (ps->next_ps) {
239                         ps = ps->next_ps;
240                         if (ps->pid == pid)
241                                 break;
242                 }
243
244                 /* end of our LL? then append a new record */
245                 if (ps->pid != pid) {
246                         _cleanup_fclose_ FILE *st = NULL;
247                         char t[32];
248                         struct ps_struct *parent;
249                         int r;
250
251                         ps->next_ps = new0(struct ps_struct, 1);
252                         if (!ps->next_ps)
253                                 return log_oom();
254
255                         ps = ps->next_ps;
256                         ps->pid = pid;
257                         ps->sched = -1;
258                         ps->schedstat = -1;
259
260                         ps->sample = new0(struct ps_sched_struct, 1);
261                         if (!ps->sample)
262                                 return log_oom();
263
264                         ps->sample->sampledata = sampledata;
265
266                         pscount++;
267
268                         /* mark our first sample */
269                         ps->first = ps->last = ps->sample;
270                         ps->sample->runtime = atoll(rt);
271                         ps->sample->waittime = atoll(wt);
272
273                         /* get name, start time */
274                         if (ps->sched < 0) {
275                                 sprintf(filename, "%d/sched", pid);
276                                 ps->sched = openat(procfd, filename, O_RDONLY);
277                                 if (ps->sched == -1)
278                                         continue;
279                         }
280
281                         s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
282                         if (s <= 0) {
283                                 close(ps->sched);
284                                 ps->sched = -1;
285                                 continue;
286                         }
287                         buf[s] = '\0';
288
289                         if (!sscanf(buf, "%s %*s %*s", key))
290                                 continue;
291
292                         strscpy(ps->name, sizeof(ps->name), key);
293
294                         /* cmdline */
295                         if (arg_show_cmdline)
296                                 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
297
298                         /* discard line 2 */
299                         m = bufgetline(buf);
300                         if (!m)
301                                 continue;
302
303                         m = bufgetline(m);
304                         if (!m)
305                                 continue;
306
307                         if (!sscanf(m, "%*s %*s %s", t))
308                                 continue;
309
310                         r = safe_atod(t, &ps->starttime);
311                         if (r < 0)
312                                 continue;
313
314                         ps->starttime /= 1000.0;
315
316                         if (arg_show_cgroup)
317                                 /* if this fails, that's OK */
318                                 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
319                                                 ps->pid, &ps->cgroup);
320
321                         /* ppid */
322                         sprintf(filename, "%d/stat", pid);
323                         fd = openat(procfd, filename, O_RDONLY);
324                         if (fd == -1)
325                                 continue;
326                         st = fdopen(fd, "r");
327                         if (!st) {
328                                 close(fd);
329                                 continue;
330                         }
331                         if (!fscanf(st, "%*s %*s %*s %i", &p)) {
332                                 continue;
333                         }
334                         ps->ppid = p;
335
336                         /*
337                          * setup child pointers
338                          *
339                          * these are used to paint the tree coherently later
340                          * each parent has a LL of children, and a LL of siblings
341                          */
342                         if (pid == 1)
343                                 continue; /* nothing to do for init atm */
344
345                         /* kthreadd has ppid=0, which breaks our tree ordering */
346                         if (ps->ppid == 0)
347                                 ps->ppid = 1;
348
349                         parent = ps_first;
350                         while ((parent->next_ps && parent->pid != ps->ppid))
351                                 parent = parent->next_ps;
352
353                         if (parent->pid != ps->ppid) {
354                                 /* orphan */
355                                 ps->ppid = 1;
356                                 parent = ps_first->next_ps;
357                         }
358
359                         ps->parent = parent;
360
361                         if (!parent->children) {
362                                 /* it's the first child */
363                                 parent->children = ps;
364                         } else {
365                                 /* walk all children and append */
366                                 struct ps_struct *children;
367                                 children = parent->children;
368                                 while (children->next)
369                                         children = children->next;
370                                 children->next = ps;
371                         }
372                 }
373
374                 /* else -> found pid, append data in ps */
375
376                 /* below here is all continuous logging parts - we get here on every
377                  * iteration */
378
379                 /* rt, wt */
380                 if (ps->schedstat < 0) {
381                         sprintf(filename, "%d/schedstat", pid);
382                         ps->schedstat = openat(procfd, filename, O_RDONLY);
383                         if (ps->schedstat == -1)
384                                 continue;
385                 }
386                 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
387                 if (s <= 0) {
388                         /* clean up our file descriptors - assume that the process exited */
389                         close(ps->schedstat);
390                         ps->schedstat = -1;
391                         if (ps->sched) {
392                                 close(ps->sched);
393                                 ps->sched = -1;
394                         }
395                         //if (ps->smaps)
396                         //        fclose(ps->smaps);
397                         continue;
398                 }
399                 buf[s] = '\0';
400
401                 if (!sscanf(buf, "%s %s %*s", rt, wt))
402                         continue;
403
404                 ps->sample->next = new0(struct ps_sched_struct, 1);
405                 if (!ps->sample->next)
406                         return log_oom();
407
408                 ps->sample->next->prev = ps->sample;
409                 ps->sample = ps->sample->next;
410                 ps->last = ps->sample;
411                 ps->sample->runtime = atoll(rt);
412                 ps->sample->waittime = atoll(wt);
413                 ps->sample->sampledata = sampledata;
414                 ps->sample->ps_new = ps;
415                 if (ps_prev) {
416                         ps_prev->cross = ps->sample;
417                 }
418                 ps_prev = ps->sample;
419                 ps->total = (ps->last->runtime - ps->first->runtime)
420                             / 1000000000.0;
421
422                 if (!arg_pss)
423                         goto catch_rename;
424
425                 /* Pss */
426                 if (!ps->smaps) {
427                         sprintf(filename, "%d/smaps", pid);
428                         fd = openat(procfd, filename, O_RDONLY);
429                         if (fd == -1)
430                                 continue;
431                         ps->smaps = fdopen(fd, "r");
432                         if (!ps->smaps) {
433                                 close(fd);
434                                 continue;
435                         }
436                         setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
437                 }
438                 else {
439                         rewind(ps->smaps);
440                 }
441                 /* test to see if we need to skip another field */
442                 if (skip == 0) {
443                         if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
444                                 continue;
445                         }
446                         if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
447                                 continue;
448                         }
449                         if (buf[392] == 'V') {
450                                 skip = 2;
451                         }
452                         else {
453                                 skip = 1;
454                         }
455                         rewind(ps->smaps);
456                 }
457                 while (1) {
458                         int pss_kb;
459
460                         /* skip one line, this contains the object mapped. */
461                         if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
462                                 break;
463                         }
464                         /* then there's a 28 char 14 line block */
465                         if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
466                                 break;
467                         }
468                         pss_kb = atoi(&buf[61]);
469                         ps->sample->pss += pss_kb;
470
471                         /* skip one more line if this is a newer kernel */
472                         if (skip == 2) {
473                                if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
474                                        break;
475                         }
476                 }
477                 if (ps->sample->pss > ps->pss_max)
478                         ps->pss_max = ps->sample->pss;
479
480 catch_rename:
481                 /* catch process rename, try to randomize time */
482                 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
483                 if (((samples - ps->pid) + pid) % (int)(mod) == 0) {
484
485                         /* re-fetch name */
486                         /* get name, start time */
487                         if (!ps->sched) {
488                                 sprintf(filename, "%d/sched", pid);
489                                 ps->sched = openat(procfd, filename, O_RDONLY);
490                                 if (ps->sched == -1)
491                                         continue;
492                         }
493                         s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
494                         if (s <= 0) {
495                                 /* clean up file descriptors */
496                                 close(ps->sched);
497                                 ps->sched = -1;
498                                 if (ps->schedstat) {
499                                         close(ps->schedstat);
500                                         ps->schedstat = -1;
501                                 }
502                                 //if (ps->smaps)
503                                 //        fclose(ps->smaps);
504                                 continue;
505                         }
506                         buf[s] = '\0';
507
508                         if (!sscanf(buf, "%s %*s %*s", key))
509                                 continue;
510
511                         strscpy(ps->name, sizeof(ps->name), key);
512
513                         /* cmdline */
514                         if (arg_show_cmdline)
515                                 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
516                 }
517         }
518
519         return 0;
520 }