src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6 ***/
   7
   8 #include <alloca.h>
   9 //#include <errno.h>
  10 //#include <fcntl.h>
  11 #include <sched.h>
  12 //#include <signal.h>
  13 //#include <stdarg.h>
  14 //#include <stdio.h>
  15 #include <stdlib.h>
  16 //#include <string.h>
  17 //#include <sys/mman.h>
  18 #include <sys/prctl.h>
  19 #include <sys/statfs.h>
  20 #include <sys/sysmacros.h>
  21 //#include <sys/types.h>
  22 //#include <unistd.h>
  23
  24 #include "alloc-util.h"
  25 //#include "btrfs-util.h"
  26 #include "build.h"
  27 #include "cgroup-util.h"
  28 //#include "def.h"
  29 //#include "device-nodes.h"
  30 #include "dirent-util.h"
  31 #include "fd-util.h"
  32 #include "fileio.h"
  33 //#include "format-util.h"
  34 #include "hashmap.h"
  35 #include "hostname-util.h"
  36 //#include "log.h"
  37 #include "macro.h"
  38 //#include "missing.h"
  39 #include "parse-util.h"
  40 //#include "path-util.h"
  41 #include "process-util.h"
  42 #include "procfs-util.h"
  43 #include "set.h"
  44 #include "signal-util.h"
  45 #include "stat-util.h"
  46 #include "string-util.h"
  47 #include "strv.h"
  48 #include "time-util.h"
  49 #include "umask-util.h"
  50 #include "user-util.h"
  51 #include "util.h"
  52 //#include "virt.h"
  53
  54 int saved_argc = 0;
  55 char **saved_argv = NULL;
  56 static int saved_in_initrd = -1;
  57
  58 size_t page_size(void) {
  59         static thread_local size_t pgsz = 0;
  60         long r;
  61
  62         if (_likely_(pgsz > 0))
  63                 return pgsz;
  64
  65         r = sysconf(_SC_PAGESIZE);
  66         assert(r > 0);
  67
  68         pgsz = (size_t) r;
  69         return pgsz;
  70 }
  71
  72 #if 0 /// UNNEEDED by elogind
  73 bool plymouth_running(void) {
  74         return access("/run/plymouth/pid", F_OK) >= 0;
  75 }
  76 #endif // 0
  77
  78 bool display_is_local(const char *display) {
  79         assert(display);
  80
  81         return
  82                 display[0] == ':' &&
  83                 display[1] >= '0' &&
  84                 display[1] <= '9';
  85 }
  86
  87 int socket_from_display(const char *display, char **path) {
  88         size_t k;
  89         char *f, *c;
  90
  91         assert(display);
  92         assert(path);
  93
  94         if (!display_is_local(display))
  95                 return -EINVAL;
  96
  97         k = strspn(display+1, "0123456789");
  98
  99         f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
 100         if (!f)
 101                 return -ENOMEM;
 102
 103         c = stpcpy(f, "/tmp/.X11-unix/X");
 104         memcpy(c, display+1, k);
 105         c[k] = 0;
 106
 107         *path = f;
 108
 109         return 0;
 110 }
 111
 112 #if 0 /// UNNEEDED by elogind
 113 bool kexec_loaded(void) {
 114        _cleanup_free_ char *s = NULL;
 115
 116        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
 117                return false;
 118
 119        return s[0] == '1';
 120 }
 121
 122 int prot_from_flags(int flags) {
 123
 124         switch (flags & O_ACCMODE) {
 125
 126         case O_RDONLY:
 127                 return PROT_READ;
 128
 129         case O_WRONLY:
 130                 return PROT_WRITE;
 131
 132         case O_RDWR:
 133                 return PROT_READ|PROT_WRITE;
 134
 135         default:
 136                 return -EINVAL;
 137         }
 138 }
 139 #endif // 0
 140
 141 bool in_initrd(void) {
 142         struct statfs s;
 143
 144         if (saved_in_initrd >= 0)
 145                 return saved_in_initrd;
 146
 147         /* We make two checks here:
 148          *
 149          * 1. the flag file /etc/initrd-release must exist
 150          * 2. the root file system must be a memory file system
 151          *
 152          * The second check is extra paranoia, since misdetecting an
 153          * initrd can have bad consequences due the initrd
 154          * emptying when transititioning to the main systemd.
 155          */
 156
 157         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 158                           statfs("/", &s) >= 0 &&
 159                           is_temporary_fs(&s);
 160
 161         return saved_in_initrd;
 162 }
 163
 164 #if 0 /// UNNEEDED by elogind
 165 void in_initrd_force(bool value) {
 166         saved_in_initrd = value;
 167 }
 168
 169 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 170 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 171                  int (*compar) (const void *, const void *, void *), void *arg) {
 172         size_t l, u, idx;
 173         const void *p;
 174         int comparison;
 175
 176         assert(!size_multiply_overflow(nmemb, size));
 177
 178         l = 0;
 179         u = nmemb;
 180         while (l < u) {
 181                 idx = (l + u) / 2;
 182                 p = (const uint8_t*) base + idx * size;
 183                 comparison = compar(key, p, arg);
 184                 if (comparison < 0)
 185                         u = idx;
 186                 else if (comparison > 0)
 187                         l = idx + 1;
 188                 else
 189                         return (void *)p;
 190         }
 191         return NULL;
 192 }
 193 #endif // 0
 194
 195 int on_ac_power(void) {
 196         bool found_offline = false, found_online = false;
 197         _cleanup_closedir_ DIR *d = NULL;
 198         struct dirent *de;
 199
 200         d = opendir("/sys/class/power_supply");
 201         if (!d)
 202                 return errno == ENOENT ? true : -errno;
 203
 204         FOREACH_DIRENT(de, d, return -errno) {
 205                 _cleanup_close_ int fd = -1, device = -1;
 206                 char contents[6];
 207                 ssize_t n;
 208
 209                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 210                 if (device < 0) {
 211                         if (IN_SET(errno, ENOENT, ENOTDIR))
 212                                 continue;
 213
 214                         return -errno;
 215                 }
 216
 217                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 218                 if (fd < 0) {
 219                         if (errno == ENOENT)
 220                                 continue;
 221
 222                         return -errno;
 223                 }
 224
 225                 n = read(fd, contents, sizeof(contents));
 226                 if (n < 0)
 227                         return -errno;
 228
 229                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 230                         continue;
 231
 232                 safe_close(fd);
 233                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 234                 if (fd < 0) {
 235                         if (errno == ENOENT)
 236                                 continue;
 237
 238                         return -errno;
 239                 }
 240
 241                 n = read(fd, contents, sizeof(contents));
 242                 if (n < 0)
 243                         return -errno;
 244
 245                 if (n != 2 || contents[1] != '\n')
 246                         return -EIO;
 247
 248                 if (contents[0] == '1') {
 249                         found_online = true;
 250                         break;
 251                 } else if (contents[0] == '0')
 252                         found_offline = true;
 253                 else
 254                         return -EIO;
 255         }
 256
 257         return found_online || !found_offline;
 258 }
 259
 260 int container_get_leader(const char *machine, pid_t *pid) {
 261         _cleanup_free_ char *s = NULL, *class = NULL;
 262         const char *p;
 263         pid_t leader;
 264         int r;
 265
 266         assert(machine);
 267         assert(pid);
 268
 269         if (!machine_name_is_valid(machine))
 270                 return -EINVAL;
 271
 272         p = strjoina("/run/systemd/machines/", machine);
 273         r = parse_env_file(NULL, p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 274         if (r == -ENOENT)
 275                 return -EHOSTDOWN;
 276         if (r < 0)
 277                 return r;
 278         if (!s)
 279                 return -EIO;
 280
 281         if (!streq_ptr(class, "container"))
 282                 return -EIO;
 283
 284         r = parse_pid(s, &leader);
 285         if (r < 0)
 286                 return r;
 287         if (leader <= 1)
 288                 return -EIO;
 289
 290         *pid = leader;
 291         return 0;
 292 }
 293
 294 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 295         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 296         int rfd = -1;
 297
 298         assert(pid >= 0);
 299
 300         if (mntns_fd) {
 301                 const char *mntns;
 302
 303                 mntns = procfs_file_alloca(pid, "ns/mnt");
 304                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 305                 if (mntnsfd < 0)
 306                         return -errno;
 307         }
 308
 309         if (pidns_fd) {
 310                 const char *pidns;
 311
 312                 pidns = procfs_file_alloca(pid, "ns/pid");
 313                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 314                 if (pidnsfd < 0)
 315                         return -errno;
 316         }
 317
 318         if (netns_fd) {
 319                 const char *netns;
 320
 321                 netns = procfs_file_alloca(pid, "ns/net");
 322                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 323                 if (netnsfd < 0)
 324                         return -errno;
 325         }
 326
 327         if (userns_fd) {
 328                 const char *userns;
 329
 330                 userns = procfs_file_alloca(pid, "ns/user");
 331                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 332                 if (usernsfd < 0 && errno != ENOENT)
 333                         return -errno;
 334         }
 335
 336         if (root_fd) {
 337                 const char *root;
 338
 339                 root = procfs_file_alloca(pid, "root");
 340                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 341                 if (rfd < 0)
 342                         return -errno;
 343         }
 344
 345         if (pidns_fd)
 346                 *pidns_fd = pidnsfd;
 347
 348         if (mntns_fd)
 349                 *mntns_fd = mntnsfd;
 350
 351         if (netns_fd)
 352                 *netns_fd = netnsfd;
 353
 354         if (userns_fd)
 355                 *userns_fd = usernsfd;
 356
 357         if (root_fd)
 358                 *root_fd = rfd;
 359
 360         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 361
 362         return 0;
 363 }
 364
 365 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 366         if (userns_fd >= 0) {
 367                 /* Can't setns to your own userns, since then you could
 368                  * escalate from non-root to root in your own namespace, so
 369                  * check if namespaces equal before attempting to enter. */
 370                 _cleanup_free_ char *userns_fd_path = NULL;
 371                 int r;
 372                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 373                         return -ENOMEM;
 374
 375                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 376                 if (r < 0)
 377                         return r;
 378                 if (r)
 379                         userns_fd = -1;
 380         }
 381
 382         if (pidns_fd >= 0)
 383                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 384                         return -errno;
 385
 386         if (mntns_fd >= 0)
 387                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 388                         return -errno;
 389
 390         if (netns_fd >= 0)
 391                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 392                         return -errno;
 393
 394         if (userns_fd >= 0)
 395                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 396                         return -errno;
 397
 398         if (root_fd >= 0) {
 399                 if (fchdir(root_fd) < 0)
 400                         return -errno;
 401
 402                 if (chroot(".") < 0)
 403                         return -errno;
 404         }
 405
 406         return reset_uid_gid();
 407 }
 408
 409 uint64_t physical_memory(void) {
 410         _cleanup_free_ char *root = NULL, *value = NULL;
 411         uint64_t mem, lim;
 412         size_t ps;
 413         long sc;
 414         int r;
 415
 416         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 417          * memory.
 418          *
 419          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 420          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 421
 422         sc = sysconf(_SC_PHYS_PAGES);
 423         assert(sc > 0);
 424
 425         ps = page_size();
 426         mem = (uint64_t) sc * (uint64_t) ps;
 427
 428         r = cg_get_root_path(&root);
 429         if (r < 0) {
 430                 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
 431                 return mem;
 432         }
 433
 434         r = cg_all_unified();
 435         if (r < 0) {
 436                 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
 437                 return mem;
 438         }
 439         if (r > 0) {
 440                 r = cg_get_attribute("memory", root, "memory.max", &value);
 441                 if (r < 0) {
 442                         log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
 443                         return mem;
 444                 }
 445
 446                 if (streq(value, "max"))
 447                         return mem;
 448         } else {
 449                 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
 450                 if (r < 0) {
 451                         log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
 452                         return mem;
 453                 }
 454         }
 455
 456         r = safe_atou64(value, &lim);
 457         if (r < 0) {
 458                 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
 459                 return mem;
 460         }
 461         if (lim == UINT64_MAX)
 462                 return mem;
 463
 464         /* Make sure the limit is a multiple of our own page size */
 465         lim /= ps;
 466         lim *= ps;
 467
 468         return MIN(mem, lim);
 469 }
 470
 471 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 472         uint64_t p, m, ps, r;
 473
 474         assert(max > 0);
 475
 476         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 477          * the result is a multiple of the page size (rounds down). */
 478
 479         ps = page_size();
 480         assert(ps > 0);
 481
 482         p = physical_memory() / ps;
 483         assert(p > 0);
 484
 485         m = p * v;
 486         if (m / p != v)
 487                 return UINT64_MAX;
 488
 489         m /= max;
 490
 491         r = m * ps;
 492         if (r / ps != m)
 493                 return UINT64_MAX;
 494
 495         return r;
 496 }
 497
 498 uint64_t system_tasks_max(void) {
 499
 500         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 501         _cleanup_free_ char *root = NULL;
 502         int r;
 503
 504         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 505          * limit:
 506          *
 507          * a) the maximum tasks value the kernel allows on this architecture
 508          * b) the cgroups pids_max attribute for the system
 509          * c) the kernel's configured maximum PID value
 510          *
 511          * And then pick the smallest of the three */
 512
 513         r = procfs_tasks_get_limit(&a);
 514         if (r < 0)
 515                 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
 516
 517         r = cg_get_root_path(&root);
 518         if (r < 0)
 519                 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
 520         else {
 521                 _cleanup_free_ char *value = NULL;
 522
 523                 r = cg_get_attribute("pids", root, "pids.max", &value);
 524                 if (r < 0)
 525                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
 526                 else if (!streq(value, "max")) {
 527                         r = safe_atou64(value, &b);
 528                         if (r < 0)
 529                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
 530                 }
 531         }
 532
 533         return MIN3(TASKS_MAX,
 534                     a <= 0 ? TASKS_MAX : a,
 535                     b <= 0 ? TASKS_MAX : b);
 536 }
 537
 538 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 539         uint64_t t, m;
 540
 541         assert(max > 0);
 542
 543         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 544          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 545
 546         t = system_tasks_max();
 547         assert(t > 0);
 548
 549         m = t * v;
 550         if (m / t != v) /* overflow? */
 551                 return UINT64_MAX;
 552
 553         return m / max;
 554 }
 555
 556 int version(void) {
 557         puts(PACKAGE_STRING "\n"
 558              SYSTEMD_FEATURES);
 559         return 0;
 560 }
 561
 562 #if 0 /// UNNEEDED by elogind
 563 /* This is a direct translation of str_verscmp from boot.c */
 564 static bool is_digit(int c) {
 565         return c >= '0' && c <= '9';
 566 }
 567
 568 static int c_order(int c) {
 569         if (c == 0 || is_digit(c))
 570                 return 0;
 571
 572         if ((c >= 'a') && (c <= 'z'))
 573                 return c;
 574
 575         return c + 0x10000;
 576 }
 577
 578 int str_verscmp(const char *s1, const char *s2) {
 579         const char *os1, *os2;
 580
 581         assert(s1);
 582         assert(s2);
 583
 584         os1 = s1;
 585         os2 = s2;
 586
 587         while (*s1 || *s2) {
 588                 int first;
 589
 590                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 591                         int order;
 592
 593                         order = c_order(*s1) - c_order(*s2);
 594                         if (order != 0)
 595                                 return order;
 596                         s1++;
 597                         s2++;
 598                 }
 599
 600                 while (*s1 == '0')
 601                         s1++;
 602                 while (*s2 == '0')
 603                         s2++;
 604
 605                 first = 0;
 606                 while (is_digit(*s1) && is_digit(*s2)) {
 607                         if (first == 0)
 608                                 first = *s1 - *s2;
 609                         s1++;
 610                         s2++;
 611                 }
 612
 613                 if (is_digit(*s1))
 614                         return 1;
 615                 if (is_digit(*s2))
 616                         return -1;
 617
 618                 if (first != 0)
 619                         return first;
 620         }
 621
 622         return strcmp(os1, os2);
 623 }
 624
 625 /* Turn off core dumps but only if we're running outside of a container. */
 626 void disable_coredumps(void) {
 627         int r;
 628
 629         if (detect_container() > 0)
 630                 return;
 631
 632         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
 633         if (r < 0)
 634                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 635 }
 636 #endif // 0