src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2010 Lennart Poettering
   4 ***/
   5
   6 #include <alloca.h>
   7 //#include <errno.h>
   8 //#include <fcntl.h>
   9 #include <sched.h>
  10 //#include <signal.h>
  11 //#include <stdarg.h>
  12 //#include <stdio.h>
  13 #include <stdlib.h>
  14 //#include <string.h>
  15 //#include <sys/mman.h>
  16 #include <sys/prctl.h>
  17 #include <sys/statfs.h>
  18 #include <sys/sysmacros.h>
  19 //#include <sys/types.h>
  20 //#include <unistd.h>
  21
  22 #include "alloc-util.h"
  23 //#include "btrfs-util.h"
  24 #include "build.h"
  25 #include "cgroup-util.h"
  26 //#include "def.h"
  27 //#include "device-nodes.h"
  28 #include "dirent-util.h"
  29 #include "fd-util.h"
  30 #include "fileio.h"
  31 //#include "format-util.h"
  32 #include "hashmap.h"
  33 #include "hostname-util.h"
  34 //#include "log.h"
  35 #include "macro.h"
  36 //#include "missing.h"
  37 #include "parse-util.h"
  38 //#include "path-util.h"
  39 #include "process-util.h"
  40 #include "procfs-util.h"
  41 #include "set.h"
  42 #include "signal-util.h"
  43 #include "stat-util.h"
  44 #include "string-util.h"
  45 #include "strv.h"
  46 #include "time-util.h"
  47 #include "umask-util.h"
  48 #include "user-util.h"
  49 #include "util.h"
  50 //#include "virt.h"
  51
  52 int saved_argc = 0;
  53 char **saved_argv = NULL;
  54 static int saved_in_initrd = -1;
  55
  56 size_t page_size(void) {
  57         static thread_local size_t pgsz = 0;
  58         long r;
  59
  60         if (_likely_(pgsz > 0))
  61                 return pgsz;
  62
  63         r = sysconf(_SC_PAGESIZE);
  64         assert(r > 0);
  65
  66         pgsz = (size_t) r;
  67         return pgsz;
  68 }
  69
  70 #if 0 /// UNNEEDED by elogind
  71 bool plymouth_running(void) {
  72         return access("/run/plymouth/pid", F_OK) >= 0;
  73 }
  74 #endif // 0
  75
  76 bool display_is_local(const char *display) {
  77         assert(display);
  78
  79         return
  80                 display[0] == ':' &&
  81                 display[1] >= '0' &&
  82                 display[1] <= '9';
  83 }
  84
  85 int socket_from_display(const char *display, char **path) {
  86         size_t k;
  87         char *f, *c;
  88
  89         assert(display);
  90         assert(path);
  91
  92         if (!display_is_local(display))
  93                 return -EINVAL;
  94
  95         k = strspn(display+1, "0123456789");
  96
  97         f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
  98         if (!f)
  99                 return -ENOMEM;
 100
 101         c = stpcpy(f, "/tmp/.X11-unix/X");
 102         memcpy(c, display+1, k);
 103         c[k] = 0;
 104
 105         *path = f;
 106
 107         return 0;
 108 }
 109
 110 #if 0 /// UNNEEDED by elogind
 111 bool kexec_loaded(void) {
 112        _cleanup_free_ char *s = NULL;
 113
 114        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
 115                return false;
 116
 117        return s[0] == '1';
 118 }
 119
 120 int prot_from_flags(int flags) {
 121
 122         switch (flags & O_ACCMODE) {
 123
 124         case O_RDONLY:
 125                 return PROT_READ;
 126
 127         case O_WRONLY:
 128                 return PROT_WRITE;
 129
 130         case O_RDWR:
 131                 return PROT_READ|PROT_WRITE;
 132
 133         default:
 134                 return -EINVAL;
 135         }
 136 }
 137 #endif // 0
 138
 139 bool in_initrd(void) {
 140         struct statfs s;
 141
 142         if (saved_in_initrd >= 0)
 143                 return saved_in_initrd;
 144
 145         /* We make two checks here:
 146          *
 147          * 1. the flag file /etc/initrd-release must exist
 148          * 2. the root file system must be a memory file system
 149          *
 150          * The second check is extra paranoia, since misdetecting an
 151          * initrd can have bad consequences due the initrd
 152          * emptying when transititioning to the main systemd.
 153          */
 154
 155         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 156                           statfs("/", &s) >= 0 &&
 157                           is_temporary_fs(&s);
 158
 159         return saved_in_initrd;
 160 }
 161
 162 #if 0 /// UNNEEDED by elogind
 163 void in_initrd_force(bool value) {
 164         saved_in_initrd = value;
 165 }
 166
 167 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 168 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 169                  int (*compar) (const void *, const void *, void *), void *arg) {
 170         size_t l, u, idx;
 171         const void *p;
 172         int comparison;
 173
 174         assert(!size_multiply_overflow(nmemb, size));
 175
 176         l = 0;
 177         u = nmemb;
 178         while (l < u) {
 179                 idx = (l + u) / 2;
 180                 p = (const uint8_t*) base + idx * size;
 181                 comparison = compar(key, p, arg);
 182                 if (comparison < 0)
 183                         u = idx;
 184                 else if (comparison > 0)
 185                         l = idx + 1;
 186                 else
 187                         return (void *)p;
 188         }
 189         return NULL;
 190 }
 191 #endif // 0
 192
 193 int on_ac_power(void) {
 194         bool found_offline = false, found_online = false;
 195         _cleanup_closedir_ DIR *d = NULL;
 196         struct dirent *de;
 197
 198         d = opendir("/sys/class/power_supply");
 199         if (!d)
 200                 return errno == ENOENT ? true : -errno;
 201
 202         FOREACH_DIRENT(de, d, return -errno) {
 203                 _cleanup_close_ int fd = -1, device = -1;
 204                 char contents[6];
 205                 ssize_t n;
 206
 207                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 208                 if (device < 0) {
 209                         if (IN_SET(errno, ENOENT, ENOTDIR))
 210                                 continue;
 211
 212                         return -errno;
 213                 }
 214
 215                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 216                 if (fd < 0) {
 217                         if (errno == ENOENT)
 218                                 continue;
 219
 220                         return -errno;
 221                 }
 222
 223                 n = read(fd, contents, sizeof(contents));
 224                 if (n < 0)
 225                         return -errno;
 226
 227                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 228                         continue;
 229
 230                 safe_close(fd);
 231                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 232                 if (fd < 0) {
 233                         if (errno == ENOENT)
 234                                 continue;
 235
 236                         return -errno;
 237                 }
 238
 239                 n = read(fd, contents, sizeof(contents));
 240                 if (n < 0)
 241                         return -errno;
 242
 243                 if (n != 2 || contents[1] != '\n')
 244                         return -EIO;
 245
 246                 if (contents[0] == '1') {
 247                         found_online = true;
 248                         break;
 249                 } else if (contents[0] == '0')
 250                         found_offline = true;
 251                 else
 252                         return -EIO;
 253         }
 254
 255         return found_online || !found_offline;
 256 }
 257
 258 int container_get_leader(const char *machine, pid_t *pid) {
 259         _cleanup_free_ char *s = NULL, *class = NULL;
 260         const char *p;
 261         pid_t leader;
 262         int r;
 263
 264         assert(machine);
 265         assert(pid);
 266
 267         if (!machine_name_is_valid(machine))
 268                 return -EINVAL;
 269
 270         p = strjoina("/run/systemd/machines/", machine);
 271         r = parse_env_file(NULL, p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 272         if (r == -ENOENT)
 273                 return -EHOSTDOWN;
 274         if (r < 0)
 275                 return r;
 276         if (!s)
 277                 return -EIO;
 278
 279         if (!streq_ptr(class, "container"))
 280                 return -EIO;
 281
 282         r = parse_pid(s, &leader);
 283         if (r < 0)
 284                 return r;
 285         if (leader <= 1)
 286                 return -EIO;
 287
 288         *pid = leader;
 289         return 0;
 290 }
 291
 292 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 293         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 294         int rfd = -1;
 295
 296         assert(pid >= 0);
 297
 298         if (mntns_fd) {
 299                 const char *mntns;
 300
 301                 mntns = procfs_file_alloca(pid, "ns/mnt");
 302                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 303                 if (mntnsfd < 0)
 304                         return -errno;
 305         }
 306
 307         if (pidns_fd) {
 308                 const char *pidns;
 309
 310                 pidns = procfs_file_alloca(pid, "ns/pid");
 311                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 312                 if (pidnsfd < 0)
 313                         return -errno;
 314         }
 315
 316         if (netns_fd) {
 317                 const char *netns;
 318
 319                 netns = procfs_file_alloca(pid, "ns/net");
 320                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 321                 if (netnsfd < 0)
 322                         return -errno;
 323         }
 324
 325         if (userns_fd) {
 326                 const char *userns;
 327
 328                 userns = procfs_file_alloca(pid, "ns/user");
 329                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 330                 if (usernsfd < 0 && errno != ENOENT)
 331                         return -errno;
 332         }
 333
 334         if (root_fd) {
 335                 const char *root;
 336
 337                 root = procfs_file_alloca(pid, "root");
 338                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 339                 if (rfd < 0)
 340                         return -errno;
 341         }
 342
 343         if (pidns_fd)
 344                 *pidns_fd = pidnsfd;
 345
 346         if (mntns_fd)
 347                 *mntns_fd = mntnsfd;
 348
 349         if (netns_fd)
 350                 *netns_fd = netnsfd;
 351
 352         if (userns_fd)
 353                 *userns_fd = usernsfd;
 354
 355         if (root_fd)
 356                 *root_fd = rfd;
 357
 358         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 359
 360         return 0;
 361 }
 362
 363 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 364         if (userns_fd >= 0) {
 365                 /* Can't setns to your own userns, since then you could
 366                  * escalate from non-root to root in your own namespace, so
 367                  * check if namespaces equal before attempting to enter. */
 368                 _cleanup_free_ char *userns_fd_path = NULL;
 369                 int r;
 370                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 371                         return -ENOMEM;
 372
 373                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 374                 if (r < 0)
 375                         return r;
 376                 if (r)
 377                         userns_fd = -1;
 378         }
 379
 380         if (pidns_fd >= 0)
 381                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 382                         return -errno;
 383
 384         if (mntns_fd >= 0)
 385                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 386                         return -errno;
 387
 388         if (netns_fd >= 0)
 389                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 390                         return -errno;
 391
 392         if (userns_fd >= 0)
 393                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 394                         return -errno;
 395
 396         if (root_fd >= 0) {
 397                 if (fchdir(root_fd) < 0)
 398                         return -errno;
 399
 400                 if (chroot(".") < 0)
 401                         return -errno;
 402         }
 403
 404         return reset_uid_gid();
 405 }
 406
 407 uint64_t physical_memory(void) {
 408         _cleanup_free_ char *root = NULL, *value = NULL;
 409         uint64_t mem, lim;
 410         size_t ps;
 411         long sc;
 412         int r;
 413
 414         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 415          * memory.
 416          *
 417          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 418          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 419
 420         sc = sysconf(_SC_PHYS_PAGES);
 421         assert(sc > 0);
 422
 423         ps = page_size();
 424         mem = (uint64_t) sc * (uint64_t) ps;
 425
 426         r = cg_get_root_path(&root);
 427         if (r < 0) {
 428                 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
 429                 return mem;
 430         }
 431
 432         r = cg_all_unified();
 433         if (r < 0) {
 434                 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
 435                 return mem;
 436         }
 437         if (r > 0) {
 438                 r = cg_get_attribute("memory", root, "memory.max", &value);
 439                 if (r < 0) {
 440                         log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
 441                         return mem;
 442                 }
 443
 444                 if (streq(value, "max"))
 445                         return mem;
 446         } else {
 447                 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
 448                 if (r < 0) {
 449                         log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
 450                         return mem;
 451                 }
 452         }
 453
 454         r = safe_atou64(value, &lim);
 455         if (r < 0) {
 456                 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
 457                 return mem;
 458         }
 459         if (lim == UINT64_MAX)
 460                 return mem;
 461
 462         /* Make sure the limit is a multiple of our own page size */
 463         lim /= ps;
 464         lim *= ps;
 465
 466         return MIN(mem, lim);
 467 }
 468
 469 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 470         uint64_t p, m, ps, r;
 471
 472         assert(max > 0);
 473
 474         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 475          * the result is a multiple of the page size (rounds down). */
 476
 477         ps = page_size();
 478         assert(ps > 0);
 479
 480         p = physical_memory() / ps;
 481         assert(p > 0);
 482
 483         m = p * v;
 484         if (m / p != v)
 485                 return UINT64_MAX;
 486
 487         m /= max;
 488
 489         r = m * ps;
 490         if (r / ps != m)
 491                 return UINT64_MAX;
 492
 493         return r;
 494 }
 495
 496 uint64_t system_tasks_max(void) {
 497
 498         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 499         _cleanup_free_ char *root = NULL;
 500         int r;
 501
 502         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 503          * limit:
 504          *
 505          * a) the maximum tasks value the kernel allows on this architecture
 506          * b) the cgroups pids_max attribute for the system
 507          * c) the kernel's configured maximum PID value
 508          *
 509          * And then pick the smallest of the three */
 510
 511         r = procfs_tasks_get_limit(&a);
 512         if (r < 0)
 513                 log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
 514
 515         r = cg_get_root_path(&root);
 516         if (r < 0)
 517                 log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
 518         else {
 519                 _cleanup_free_ char *value = NULL;
 520
 521                 r = cg_get_attribute("pids", root, "pids.max", &value);
 522                 if (r < 0)
 523                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
 524                 else if (!streq(value, "max")) {
 525                         r = safe_atou64(value, &b);
 526                         if (r < 0)
 527                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
 528                 }
 529         }
 530
 531         return MIN3(TASKS_MAX,
 532                     a <= 0 ? TASKS_MAX : a,
 533                     b <= 0 ? TASKS_MAX : b);
 534 }
 535
 536 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 537         uint64_t t, m;
 538
 539         assert(max > 0);
 540
 541         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 542          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 543
 544         t = system_tasks_max();
 545         assert(t > 0);
 546
 547         m = t * v;
 548         if (m / t != v) /* overflow? */
 549                 return UINT64_MAX;
 550
 551         return m / max;
 552 }
 553
 554 int version(void) {
 555         puts(PACKAGE_STRING "\n"
 556              SYSTEMD_FEATURES);
 557         return 0;
 558 }
 559
 560 #if 0 /// UNNEEDED by elogind
 561 /* This is a direct translation of str_verscmp from boot.c */
 562 static bool is_digit(int c) {
 563         return c >= '0' && c <= '9';
 564 }
 565
 566 static int c_order(int c) {
 567         if (c == 0 || is_digit(c))
 568                 return 0;
 569
 570         if ((c >= 'a') && (c <= 'z'))
 571                 return c;
 572
 573         return c + 0x10000;
 574 }
 575
 576 int str_verscmp(const char *s1, const char *s2) {
 577         const char *os1, *os2;
 578
 579         assert(s1);
 580         assert(s2);
 581
 582         os1 = s1;
 583         os2 = s2;
 584
 585         while (*s1 || *s2) {
 586                 int first;
 587
 588                 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
 589                         int order;
 590
 591                         order = c_order(*s1) - c_order(*s2);
 592                         if (order != 0)
 593                                 return order;
 594                         s1++;
 595                         s2++;
 596                 }
 597
 598                 while (*s1 == '0')
 599                         s1++;
 600                 while (*s2 == '0')
 601                         s2++;
 602
 603                 first = 0;
 604                 while (is_digit(*s1) && is_digit(*s2)) {
 605                         if (first == 0)
 606                                 first = *s1 - *s2;
 607                         s1++;
 608                         s2++;
 609                 }
 610
 611                 if (is_digit(*s1))
 612                         return 1;
 613                 if (is_digit(*s2))
 614                         return -1;
 615
 616                 if (first != 0)
 617                         return first;
 618         }
 619
 620         return strcmp(os1, os2);
 621 }
 622
 623 /* Turn off core dumps but only if we're running outside of a container. */
 624 void disable_coredumps(void) {
 625         int r;
 626
 627         if (detect_container() > 0)
 628                 return;
 629
 630         r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
 631         if (r < 0)
 632                 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
 633 }
 634 #endif // 0