src/basic/util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <alloca.h>
  21 //#include <dirent.h>
  22 //#include <errno.h>
  23 //#include <fcntl.h>
  24 #include <sched.h>
  25 //#include <signal.h>
  26 //#include <stdarg.h>
  27 //#include <stdio.h>
  28 #include <stdlib.h>
  29 //#include <string.h>
  30 //#include <sys/mman.h>
  31 #include <sys/prctl.h>
  32 #include <sys/statfs.h>
  33 #include <sys/sysmacros.h>
  34 //#include <sys/types.h>
  35 //#include <unistd.h>
  36
  37 #include "alloc-util.h"
  38 #include "build.h"
  39 #include "cgroup-util.h"
  40 //#include "def.h"
  41 #include "dirent-util.h"
  42 #include "fd-util.h"
  43 #include "fileio.h"
  44 //#include "formats-util.h"
  45 #include "hashmap.h"
  46 #include "hostname-util.h"
  47 //#include "log.h"
  48 #include "macro.h"
  49 //#include "missing.h"
  50 #include "parse-util.h"
  51 //#include "path-util.h"
  52 #include "process-util.h"
  53 #include "set.h"
  54 #include "signal-util.h"
  55 #include "stat-util.h"
  56 #include "string-util.h"
  57 #include "strv.h"
  58 #include "time-util.h"
  59 #include "umask-util.h"
  60 #include "user-util.h"
  61 #include "util.h"
  62
  63 int saved_argc = 0;
  64 char **saved_argv = NULL;
  65 static int saved_in_initrd = -1;
  66
  67 size_t page_size(void) {
  68         static thread_local size_t pgsz = 0;
  69         long r;
  70
  71         if (_likely_(pgsz > 0))
  72                 return pgsz;
  73
  74         r = sysconf(_SC_PAGESIZE);
  75         assert(r > 0);
  76
  77         pgsz = (size_t) r;
  78         return pgsz;
  79 }
  80
  81 #if 0 /// UNNEEDED by elogind
  82 bool plymouth_running(void) {
  83         return access("/run/plymouth/pid", F_OK) >= 0;
  84 }
  85 #endif // 0
  86
  87 bool display_is_local(const char *display) {
  88         assert(display);
  89
  90         return
  91                 display[0] == ':' &&
  92                 display[1] >= '0' &&
  93                 display[1] <= '9';
  94 }
  95
  96 int socket_from_display(const char *display, char **path) {
  97         size_t k;
  98         char *f, *c;
  99
 100         assert(display);
 101         assert(path);
 102
 103         if (!display_is_local(display))
 104                 return -EINVAL;
 105
 106         k = strspn(display+1, "0123456789");
 107
 108         f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
 109         if (!f)
 110                 return -ENOMEM;
 111
 112         c = stpcpy(f, "/tmp/.X11-unix/X");
 113         memcpy(c, display+1, k);
 114         c[k] = 0;
 115
 116         *path = f;
 117
 118         return 0;
 119 }
 120
 121 #if 0 /// UNNEEDED by elogind
 122 int block_get_whole_disk(dev_t d, dev_t *ret) {
 123         char *p, *s;
 124         int r;
 125         unsigned n, m;
 126
 127         assert(ret);
 128
 129         /* If it has a queue this is good enough for us */
 130         if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
 131                 return -ENOMEM;
 132
 133         r = access(p, F_OK);
 134         free(p);
 135
 136         if (r >= 0) {
 137                 *ret = d;
 138                 return 0;
 139         }
 140
 141         /* If it is a partition find the originating device */
 142         if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
 143                 return -ENOMEM;
 144
 145         r = access(p, F_OK);
 146         free(p);
 147
 148         if (r < 0)
 149                 return -ENOENT;
 150
 151         /* Get parent dev_t */
 152         if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
 153                 return -ENOMEM;
 154
 155         r = read_one_line_file(p, &s);
 156         free(p);
 157
 158         if (r < 0)
 159                 return r;
 160
 161         r = sscanf(s, "%u:%u", &m, &n);
 162         free(s);
 163
 164         if (r != 2)
 165                 return -EINVAL;
 166
 167         /* Only return this if it is really good enough for us. */
 168         if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
 169                 return -ENOMEM;
 170
 171         r = access(p, F_OK);
 172         free(p);
 173
 174         if (r >= 0) {
 175                 *ret = makedev(m, n);
 176                 return 0;
 177         }
 178
 179         return -ENOENT;
 180 }
 181
 182 bool kexec_loaded(void) {
 183        bool loaded = false;
 184        char *s;
 185
 186        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) {
 187                if (s[0] == '1')
 188                        loaded = true;
 189                free(s);
 190        }
 191        return loaded;
 192 }
 193
 194 int prot_from_flags(int flags) {
 195
 196         switch (flags & O_ACCMODE) {
 197
 198         case O_RDONLY:
 199                 return PROT_READ;
 200
 201         case O_WRONLY:
 202                 return PROT_WRITE;
 203
 204         case O_RDWR:
 205                 return PROT_READ|PROT_WRITE;
 206
 207         default:
 208                 return -EINVAL;
 209         }
 210 }
 211 #endif // 0
 212
 213 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
 214         bool stdout_is_tty, stderr_is_tty;
 215         pid_t parent_pid, agent_pid;
 216         sigset_t ss, saved_ss;
 217         unsigned n, i;
 218         va_list ap;
 219         char **l;
 220
 221         assert(pid);
 222         assert(path);
 223
 224         /* Spawns a temporary TTY agent, making sure it goes away when
 225          * we go away */
 226
 227         parent_pid = getpid();
 228
 229         /* First we temporarily block all signals, so that the new
 230          * child has them blocked initially. This way, we can be sure
 231          * that SIGTERMs are not lost we might send to the agent. */
 232         assert_se(sigfillset(&ss) >= 0);
 233         assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
 234
 235         agent_pid = fork();
 236         if (agent_pid < 0) {
 237                 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
 238                 return -errno;
 239         }
 240
 241         if (agent_pid != 0) {
 242                 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
 243                 *pid = agent_pid;
 244                 return 0;
 245         }
 246
 247         /* In the child:
 248          *
 249          * Make sure the agent goes away when the parent dies */
 250         if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
 251                 _exit(EXIT_FAILURE);
 252
 253         /* Make sure we actually can kill the agent, if we need to, in
 254          * case somebody invoked us from a shell script that trapped
 255          * SIGTERM or so... */
 256         (void) reset_all_signal_handlers();
 257         (void) reset_signal_mask();
 258
 259         /* Check whether our parent died before we were able
 260          * to set the death signal and unblock the signals */
 261         if (getppid() != parent_pid)
 262                 _exit(EXIT_SUCCESS);
 263
 264         /* Don't leak fds to the agent */
 265         close_all_fds(except, n_except);
 266
 267         stdout_is_tty = isatty(STDOUT_FILENO);
 268         stderr_is_tty = isatty(STDERR_FILENO);
 269
 270         if (!stdout_is_tty || !stderr_is_tty) {
 271                 int fd;
 272
 273                 /* Detach from stdout/stderr. and reopen
 274                  * /dev/tty for them. This is important to
 275                  * ensure that when systemctl is started via
 276                  * popen() or a similar call that expects to
 277                  * read EOF we actually do generate EOF and
 278                  * not delay this indefinitely by because we
 279                  * keep an unused copy of stdin around. */
 280                 fd = open("/dev/tty", O_WRONLY);
 281                 if (fd < 0) {
 282                         log_error_errno(errno, "Failed to open /dev/tty: %m");
 283                         _exit(EXIT_FAILURE);
 284                 }
 285
 286                 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
 287                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 288                         _exit(EXIT_FAILURE);
 289                 }
 290
 291                 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
 292                         log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
 293                         _exit(EXIT_FAILURE);
 294                 }
 295
 296                 if (fd > STDERR_FILENO)
 297                         close(fd);
 298         }
 299
 300         /* Count arguments */
 301         va_start(ap, path);
 302         for (n = 0; va_arg(ap, char*); n++)
 303                 ;
 304         va_end(ap);
 305
 306         /* Allocate strv */
 307         l = alloca(sizeof(char *) * (n + 1));
 308
 309         /* Fill in arguments */
 310         va_start(ap, path);
 311         for (i = 0; i <= n; i++)
 312                 l[i] = va_arg(ap, char*);
 313         va_end(ap);
 314
 315         execv(path, l);
 316         _exit(EXIT_FAILURE);
 317 }
 318
 319 bool in_initrd(void) {
 320         struct statfs s;
 321
 322         if (saved_in_initrd >= 0)
 323                 return saved_in_initrd;
 324
 325         /* We make two checks here:
 326          *
 327          * 1. the flag file /etc/initrd-release must exist
 328          * 2. the root file system must be a memory file system
 329          *
 330          * The second check is extra paranoia, since misdetecting an
 331          * initrd can have bad consequences due the initrd
 332          * emptying when transititioning to the main systemd.
 333          */
 334
 335         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 336                           statfs("/", &s) >= 0 &&
 337                           is_temporary_fs(&s);
 338
 339         return saved_in_initrd;
 340 }
 341
 342 void in_initrd_force(bool value) {
 343         saved_in_initrd = value;
 344 }
 345
 346 #if 0 /// UNNEEDED by elogind
 347 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 348 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 349                  int (*compar) (const void *, const void *, void *), void *arg) {
 350         size_t l, u, idx;
 351         const void *p;
 352         int comparison;
 353
 354         l = 0;
 355         u = nmemb;
 356         while (l < u) {
 357                 idx = (l + u) / 2;
 358                 p = (const char *) base + idx * size;
 359                 comparison = compar(key, p, arg);
 360                 if (comparison < 0)
 361                         u = idx;
 362                 else if (comparison > 0)
 363                         l = idx + 1;
 364                 else
 365                         return (void *)p;
 366         }
 367         return NULL;
 368 }
 369
 370 int on_ac_power(void) {
 371         bool found_offline = false, found_online = false;
 372         _cleanup_closedir_ DIR *d = NULL;
 373         struct dirent *de;
 374
 375         d = opendir("/sys/class/power_supply");
 376         if (!d)
 377                 return errno == ENOENT ? true : -errno;
 378
 379         FOREACH_DIRENT(de, d, return -errno) {
 380                 _cleanup_close_ int fd = -1, device = -1;
 381                 char contents[6];
 382                 ssize_t n;
 383
 384                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 385                 if (device < 0) {
 386                         if (errno == ENOENT || errno == ENOTDIR)
 387                                 continue;
 388
 389                         return -errno;
 390                 }
 391
 392                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 393                 if (fd < 0) {
 394                         if (errno == ENOENT)
 395                                 continue;
 396
 397                         return -errno;
 398                 }
 399
 400                 n = read(fd, contents, sizeof(contents));
 401                 if (n < 0)
 402                         return -errno;
 403
 404                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 405                         continue;
 406
 407                 safe_close(fd);
 408                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 409                 if (fd < 0) {
 410                         if (errno == ENOENT)
 411                                 continue;
 412
 413                         return -errno;
 414                 }
 415
 416                 n = read(fd, contents, sizeof(contents));
 417                 if (n < 0)
 418                         return -errno;
 419
 420                 if (n != 2 || contents[1] != '\n')
 421                         return -EIO;
 422
 423                 if (contents[0] == '1') {
 424                         found_online = true;
 425                         break;
 426                 } else if (contents[0] == '0')
 427                         found_offline = true;
 428                 else
 429                         return -EIO;
 430         }
 431
 432         return found_online || !found_offline;
 433 }
 434
 435 #endif // 0
 436 int container_get_leader(const char *machine, pid_t *pid) {
 437         _cleanup_free_ char *s = NULL, *class = NULL;
 438         const char *p;
 439         pid_t leader;
 440         int r;
 441
 442         assert(machine);
 443         assert(pid);
 444
 445         if (!machine_name_is_valid(machine))
 446                 return -EINVAL;
 447
 448         p = strjoina("/run/systemd/machines/", machine);
 449         r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 450         if (r == -ENOENT)
 451                 return -EHOSTDOWN;
 452         if (r < 0)
 453                 return r;
 454         if (!s)
 455                 return -EIO;
 456
 457         if (!streq_ptr(class, "container"))
 458                 return -EIO;
 459
 460         r = parse_pid(s, &leader);
 461         if (r < 0)
 462                 return r;
 463         if (leader <= 1)
 464                 return -EIO;
 465
 466         *pid = leader;
 467         return 0;
 468 }
 469
 470 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 471         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 472         int rfd = -1;
 473
 474         assert(pid >= 0);
 475
 476         if (mntns_fd) {
 477                 const char *mntns;
 478
 479                 mntns = procfs_file_alloca(pid, "ns/mnt");
 480                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 481                 if (mntnsfd < 0)
 482                         return -errno;
 483         }
 484
 485         if (pidns_fd) {
 486                 const char *pidns;
 487
 488                 pidns = procfs_file_alloca(pid, "ns/pid");
 489                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 490                 if (pidnsfd < 0)
 491                         return -errno;
 492         }
 493
 494         if (netns_fd) {
 495                 const char *netns;
 496
 497                 netns = procfs_file_alloca(pid, "ns/net");
 498                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 499                 if (netnsfd < 0)
 500                         return -errno;
 501         }
 502
 503         if (userns_fd) {
 504                 const char *userns;
 505
 506                 userns = procfs_file_alloca(pid, "ns/user");
 507                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 508                 if (usernsfd < 0 && errno != ENOENT)
 509                         return -errno;
 510         }
 511
 512         if (root_fd) {
 513                 const char *root;
 514
 515                 root = procfs_file_alloca(pid, "root");
 516                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 517                 if (rfd < 0)
 518                         return -errno;
 519         }
 520
 521         if (pidns_fd)
 522                 *pidns_fd = pidnsfd;
 523
 524         if (mntns_fd)
 525                 *mntns_fd = mntnsfd;
 526
 527         if (netns_fd)
 528                 *netns_fd = netnsfd;
 529
 530         if (userns_fd)
 531                 *userns_fd = usernsfd;
 532
 533         if (root_fd)
 534                 *root_fd = rfd;
 535
 536         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 537
 538         return 0;
 539 }
 540
 541 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 542         if (userns_fd >= 0) {
 543                 /* Can't setns to your own userns, since then you could
 544                  * escalate from non-root to root in your own namespace, so
 545                  * check if namespaces equal before attempting to enter. */
 546                 _cleanup_free_ char *userns_fd_path = NULL;
 547                 int r;
 548                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 549                         return -ENOMEM;
 550
 551                 r = files_same(userns_fd_path, "/proc/self/ns/user");
 552                 if (r < 0)
 553                         return r;
 554                 if (r)
 555                         userns_fd = -1;
 556         }
 557
 558         if (pidns_fd >= 0)
 559                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 560                         return -errno;
 561
 562         if (mntns_fd >= 0)
 563                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 564                         return -errno;
 565
 566         if (netns_fd >= 0)
 567                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 568                         return -errno;
 569
 570         if (userns_fd >= 0)
 571                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 572                         return -errno;
 573
 574         if (root_fd >= 0) {
 575                 if (fchdir(root_fd) < 0)
 576                         return -errno;
 577
 578                 if (chroot(".") < 0)
 579                         return -errno;
 580         }
 581
 582         return reset_uid_gid();
 583 }
 584
 585 uint64_t physical_memory(void) {
 586         _cleanup_free_ char *root = NULL, *value = NULL;
 587         uint64_t mem, lim;
 588         size_t ps;
 589         long sc;
 590
 591         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 592          * memory.
 593          *
 594          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 595          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 596
 597         sc = sysconf(_SC_PHYS_PAGES);
 598         assert(sc > 0);
 599
 600         ps = page_size();
 601         mem = (uint64_t) sc * (uint64_t) ps;
 602
 603         if (cg_get_root_path(&root) < 0)
 604                 return mem;
 605
 606         if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
 607                 return mem;
 608
 609         if (safe_atou64(value, &lim) < 0)
 610                 return mem;
 611
 612         /* Make sure the limit is a multiple of our own page size */
 613         lim /= ps;
 614         lim *= ps;
 615
 616         return MIN(mem, lim);
 617 }
 618
 619 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 620         uint64_t p, m, ps, r;
 621
 622         assert(max > 0);
 623
 624         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 625          * the result is a multiple of the page size (rounds down). */
 626
 627         ps = page_size();
 628         assert(ps > 0);
 629
 630         p = physical_memory() / ps;
 631         assert(p > 0);
 632
 633         m = p * v;
 634         if (m / p != v)
 635                 return UINT64_MAX;
 636
 637         m /= max;
 638
 639         r = m * ps;
 640         if (r / ps != m)
 641                 return UINT64_MAX;
 642
 643         return r;
 644 }
 645
 646 uint64_t system_tasks_max(void) {
 647
 648 #if SIZEOF_PID_T == 4
 649 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
 650 #elif SIZEOF_PID_T == 2
 651 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
 652 #else
 653 #error "Unknown pid_t size"
 654 #endif
 655
 656         _cleanup_free_ char *value = NULL, *root = NULL;
 657         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 658
 659         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 660          * limit:
 661          *
 662          * a) the maximum value for the pid_t type
 663          * b) the cgroups pids_max attribute for the system
 664          * c) the kernel's configure maximum PID value
 665          *
 666          * And then pick the smallest of the three */
 667
 668         if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
 669                 (void) safe_atou64(value, &a);
 670
 671         if (cg_get_root_path(&root) >= 0) {
 672                 value = mfree(value);
 673
 674                 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
 675                         (void) safe_atou64(value, &b);
 676         }
 677
 678         return MIN3(TASKS_MAX,
 679                     a <= 0 ? TASKS_MAX : a,
 680                     b <= 0 ? TASKS_MAX : b);
 681 }
 682
 683 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 684         uint64_t t, m;
 685
 686         assert(max > 0);
 687
 688         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 689          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 690
 691         t = system_tasks_max();
 692         assert(t > 0);
 693
 694         m = t * v;
 695         if (m / t != v) /* overflow? */
 696                 return UINT64_MAX;
 697
 698         return m / max;
 699 }
 700
 701 #if 0 /// UNNEEDED by elogind
 702 int update_reboot_parameter_and_warn(const char *param) {
 703         int r;
 704
 705         if (isempty(param)) {
 706                 if (unlink("/run/systemd/reboot-param") < 0) {
 707                         if (errno == ENOENT)
 708                                 return 0;
 709
 710                         return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
 711                 }
 712
 713                 return 0;
 714         }
 715
 716         RUN_WITH_UMASK(0022) {
 717                 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
 718                 if (r < 0)
 719                         return log_warning_errno(r, "Failed to write reboot parameter file: %m");
 720         }
 721
 722         return 0;
 723 }
 724 #endif // 0
 725
 726 int version(void) {
 727         puts(PACKAGE_STRING "\n"
 728              SYSTEMD_FEATURES);
 729         return 0;
 730 }