src/basic/util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <alloca.h>
  22 //#include <errno.h>
  23 //#include <fcntl.h>
  24 #include <sched.h>
  25 //#include <signal.h>
  26 //#include <stdarg.h>
  27 //#include <stdio.h>
  28 #include <stdlib.h>
  29 //#include <string.h>
  30 //#include <sys/mman.h>
  31 #include <sys/prctl.h>
  32 #include <sys/statfs.h>
  33 #include <sys/sysmacros.h>
  34 //#include <sys/types.h>
  35 //#include <unistd.h>
  36
  37 #include "alloc-util.h"
  38 //#include "btrfs-util.h"
  39 #include "build.h"
  40 #include "cgroup-util.h"
  41 //#include "def.h"
  42 //#include "device-nodes.h"
  43 #include "dirent-util.h"
  44 #include "fd-util.h"
  45 #include "fileio.h"
  46 //#include "format-util.h"
  47 #include "hashmap.h"
  48 #include "hostname-util.h"
  49 //#include "log.h"
  50 #include "macro.h"
  51 //#include "missing.h"
  52 #include "parse-util.h"
  53 //#include "path-util.h"
  54 #include "process-util.h"
  55 #include "set.h"
  56 #include "signal-util.h"
  57 #include "stat-util.h"
  58 #include "string-util.h"
  59 #include "strv.h"
  60 #include "time-util.h"
  61 #include "umask-util.h"
  62 #include "user-util.h"
  63 #include "util.h"
  64
  65 int saved_argc = 0;
  66 char **saved_argv = NULL;
  67 static int saved_in_initrd = -1;
  68
  69 size_t page_size(void) {
  70         static thread_local size_t pgsz = 0;
  71         long r;
  72
  73         if (_likely_(pgsz > 0))
  74                 return pgsz;
  75
  76         r = sysconf(_SC_PAGESIZE);
  77         assert(r > 0);
  78
  79         pgsz = (size_t) r;
  80         return pgsz;
  81 }
  82
  83 #if 0 /// UNNEEDED by elogind
  84 bool plymouth_running(void) {
  85         return access("/run/plymouth/pid", F_OK) >= 0;
  86 }
  87 #endif // 0
  88
  89 bool display_is_local(const char *display) {
  90         assert(display);
  91
  92         return
  93                 display[0] == ':' &&
  94                 display[1] >= '0' &&
  95                 display[1] <= '9';
  96 }
  97
  98 int socket_from_display(const char *display, char **path) {
  99         size_t k;
 100         char *f, *c;
 101
 102         assert(display);
 103         assert(path);
 104
 105         if (!display_is_local(display))
 106                 return -EINVAL;
 107
 108         k = strspn(display+1, "0123456789");
 109
 110         f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
 111         if (!f)
 112                 return -ENOMEM;
 113
 114         c = stpcpy(f, "/tmp/.X11-unix/X");
 115         memcpy(c, display+1, k);
 116         c[k] = 0;
 117
 118         *path = f;
 119
 120         return 0;
 121 }
 122
 123 #if 0 /// UNNEEDED by elogind
 124 int block_get_whole_disk(dev_t d, dev_t *ret) {
 125         char p[SYS_BLOCK_PATH_MAX("/partition")];
 126         _cleanup_free_ char *s = NULL;
 127         int r;
 128         unsigned n, m;
 129
 130         assert(ret);
 131
 132         /* If it has a queue this is good enough for us */
 133         xsprintf_sys_block_path(p, "/queue", d);
 134         if (access(p, F_OK) >= 0) {
 135                 *ret = d;
 136                 return 0;
 137         }
 138
 139         /* If it is a partition find the originating device */
 140         xsprintf_sys_block_path(p, "/partition", d);
 141         if (access(p, F_OK) < 0)
 142                 return -ENOENT;
 143
 144         /* Get parent dev_t */
 145         xsprintf_sys_block_path(p, "/../dev", d);
 146         r = read_one_line_file(p, &s);
 147         if (r < 0)
 148                 return r;
 149
 150         r = sscanf(s, "%u:%u", &m, &n);
 151         if (r != 2)
 152                 return -EINVAL;
 153
 154         /* Only return this if it is really good enough for us. */
 155         xsprintf_sys_block_path(p, "/queue", makedev(m, n));
 156         if (access(p, F_OK) < 0)
 157                 return -ENOENT;
 158
 159         *ret = makedev(m, n);
 160         return 0;
 161 }
 162
 163 bool kexec_loaded(void) {
 164        _cleanup_free_ char *s = NULL;
 165
 166        if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
 167                return false;
 168
 169        return s[0] == '1';
 170 }
 171
 172 int prot_from_flags(int flags) {
 173
 174         switch (flags & O_ACCMODE) {
 175
 176         case O_RDONLY:
 177                 return PROT_READ;
 178
 179         case O_WRONLY:
 180                 return PROT_WRITE;
 181
 182         case O_RDWR:
 183                 return PROT_READ|PROT_WRITE;
 184
 185         default:
 186                 return -EINVAL;
 187         }
 188 }
 189 #endif // 0
 190
 191 bool in_initrd(void) {
 192         struct statfs s;
 193
 194         if (saved_in_initrd >= 0)
 195                 return saved_in_initrd;
 196
 197         /* We make two checks here:
 198          *
 199          * 1. the flag file /etc/initrd-release must exist
 200          * 2. the root file system must be a memory file system
 201          *
 202          * The second check is extra paranoia, since misdetecting an
 203          * initrd can have bad consequences due the initrd
 204          * emptying when transititioning to the main systemd.
 205          */
 206
 207         saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
 208                           statfs("/", &s) >= 0 &&
 209                           is_temporary_fs(&s);
 210
 211         return saved_in_initrd;
 212 }
 213
 214 void in_initrd_force(bool value) {
 215         saved_in_initrd = value;
 216 }
 217
 218 #if 0 /// UNNEEDED by elogind
 219 /* hey glibc, APIs with callbacks without a user pointer are so useless */
 220 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
 221                  int (*compar) (const void *, const void *, void *), void *arg) {
 222         size_t l, u, idx;
 223         const void *p;
 224         int comparison;
 225
 226         l = 0;
 227         u = nmemb;
 228         while (l < u) {
 229                 idx = (l + u) / 2;
 230                 p = (const char *) base + idx * size;
 231                 comparison = compar(key, p, arg);
 232                 if (comparison < 0)
 233                         u = idx;
 234                 else if (comparison > 0)
 235                         l = idx + 1;
 236                 else
 237                         return (void *)p;
 238         }
 239         return NULL;
 240 }
 241
 242 int on_ac_power(void) {
 243         bool found_offline = false, found_online = false;
 244         _cleanup_closedir_ DIR *d = NULL;
 245         struct dirent *de;
 246
 247         d = opendir("/sys/class/power_supply");
 248         if (!d)
 249                 return errno == ENOENT ? true : -errno;
 250
 251         FOREACH_DIRENT(de, d, return -errno) {
 252                 _cleanup_close_ int fd = -1, device = -1;
 253                 char contents[6];
 254                 ssize_t n;
 255
 256                 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
 257                 if (device < 0) {
 258                         if (IN_SET(errno, ENOENT, ENOTDIR))
 259                                 continue;
 260
 261                         return -errno;
 262                 }
 263
 264                 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 265                 if (fd < 0) {
 266                         if (errno == ENOENT)
 267                                 continue;
 268
 269                         return -errno;
 270                 }
 271
 272                 n = read(fd, contents, sizeof(contents));
 273                 if (n < 0)
 274                         return -errno;
 275
 276                 if (n != 6 || memcmp(contents, "Mains\n", 6))
 277                         continue;
 278
 279                 safe_close(fd);
 280                 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 281                 if (fd < 0) {
 282                         if (errno == ENOENT)
 283                                 continue;
 284
 285                         return -errno;
 286                 }
 287
 288                 n = read(fd, contents, sizeof(contents));
 289                 if (n < 0)
 290                         return -errno;
 291
 292                 if (n != 2 || contents[1] != '\n')
 293                         return -EIO;
 294
 295                 if (contents[0] == '1') {
 296                         found_online = true;
 297                         break;
 298                 } else if (contents[0] == '0')
 299                         found_offline = true;
 300                 else
 301                         return -EIO;
 302         }
 303
 304         return found_online || !found_offline;
 305 }
 306
 307 #endif // 0
 308 int container_get_leader(const char *machine, pid_t *pid) {
 309         _cleanup_free_ char *s = NULL, *class = NULL;
 310         const char *p;
 311         pid_t leader;
 312         int r;
 313
 314         assert(machine);
 315         assert(pid);
 316
 317         if (!machine_name_is_valid(machine))
 318                 return -EINVAL;
 319
 320         p = strjoina("/run/systemd/machines/", machine);
 321         r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
 322         if (r == -ENOENT)
 323                 return -EHOSTDOWN;
 324         if (r < 0)
 325                 return r;
 326         if (!s)
 327                 return -EIO;
 328
 329         if (!streq_ptr(class, "container"))
 330                 return -EIO;
 331
 332         r = parse_pid(s, &leader);
 333         if (r < 0)
 334                 return r;
 335         if (leader <= 1)
 336                 return -EIO;
 337
 338         *pid = leader;
 339         return 0;
 340 }
 341
 342 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
 343         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
 344         int rfd = -1;
 345
 346         assert(pid >= 0);
 347
 348         if (mntns_fd) {
 349                 const char *mntns;
 350
 351                 mntns = procfs_file_alloca(pid, "ns/mnt");
 352                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 353                 if (mntnsfd < 0)
 354                         return -errno;
 355         }
 356
 357         if (pidns_fd) {
 358                 const char *pidns;
 359
 360                 pidns = procfs_file_alloca(pid, "ns/pid");
 361                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 362                 if (pidnsfd < 0)
 363                         return -errno;
 364         }
 365
 366         if (netns_fd) {
 367                 const char *netns;
 368
 369                 netns = procfs_file_alloca(pid, "ns/net");
 370                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 371                 if (netnsfd < 0)
 372                         return -errno;
 373         }
 374
 375         if (userns_fd) {
 376                 const char *userns;
 377
 378                 userns = procfs_file_alloca(pid, "ns/user");
 379                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
 380                 if (usernsfd < 0 && errno != ENOENT)
 381                         return -errno;
 382         }
 383
 384         if (root_fd) {
 385                 const char *root;
 386
 387                 root = procfs_file_alloca(pid, "root");
 388                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
 389                 if (rfd < 0)
 390                         return -errno;
 391         }
 392
 393         if (pidns_fd)
 394                 *pidns_fd = pidnsfd;
 395
 396         if (mntns_fd)
 397                 *mntns_fd = mntnsfd;
 398
 399         if (netns_fd)
 400                 *netns_fd = netnsfd;
 401
 402         if (userns_fd)
 403                 *userns_fd = usernsfd;
 404
 405         if (root_fd)
 406                 *root_fd = rfd;
 407
 408         pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
 409
 410         return 0;
 411 }
 412
 413 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 414         if (userns_fd >= 0) {
 415                 /* Can't setns to your own userns, since then you could
 416                  * escalate from non-root to root in your own namespace, so
 417                  * check if namespaces equal before attempting to enter. */
 418                 _cleanup_free_ char *userns_fd_path = NULL;
 419                 int r;
 420                 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
 421                         return -ENOMEM;
 422
 423                 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
 424                 if (r < 0)
 425                         return r;
 426                 if (r)
 427                         userns_fd = -1;
 428         }
 429
 430         if (pidns_fd >= 0)
 431                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 432                         return -errno;
 433
 434         if (mntns_fd >= 0)
 435                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 436                         return -errno;
 437
 438         if (netns_fd >= 0)
 439                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 440                         return -errno;
 441
 442         if (userns_fd >= 0)
 443                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 444                         return -errno;
 445
 446         if (root_fd >= 0) {
 447                 if (fchdir(root_fd) < 0)
 448                         return -errno;
 449
 450                 if (chroot(".") < 0)
 451                         return -errno;
 452         }
 453
 454         return reset_uid_gid();
 455 }
 456
 457 uint64_t physical_memory(void) {
 458         _cleanup_free_ char *root = NULL, *value = NULL;
 459         uint64_t mem, lim;
 460         size_t ps;
 461         long sc;
 462
 463         /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
 464          * memory.
 465          *
 466          * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
 467          * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
 468
 469         sc = sysconf(_SC_PHYS_PAGES);
 470         assert(sc > 0);
 471
 472         ps = page_size();
 473         mem = (uint64_t) sc * (uint64_t) ps;
 474
 475         if (cg_get_root_path(&root) < 0)
 476                 return mem;
 477
 478         if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
 479                 return mem;
 480
 481         if (safe_atou64(value, &lim) < 0)
 482                 return mem;
 483
 484         /* Make sure the limit is a multiple of our own page size */
 485         lim /= ps;
 486         lim *= ps;
 487
 488         return MIN(mem, lim);
 489 }
 490
 491 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 492         uint64_t p, m, ps, r;
 493
 494         assert(max > 0);
 495
 496         /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
 497          * the result is a multiple of the page size (rounds down). */
 498
 499         ps = page_size();
 500         assert(ps > 0);
 501
 502         p = physical_memory() / ps;
 503         assert(p > 0);
 504
 505         m = p * v;
 506         if (m / p != v)
 507                 return UINT64_MAX;
 508
 509         m /= max;
 510
 511         r = m * ps;
 512         if (r / ps != m)
 513                 return UINT64_MAX;
 514
 515         return r;
 516 }
 517
 518 uint64_t system_tasks_max(void) {
 519
 520 #if SIZEOF_PID_T == 4
 521 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
 522 #elif SIZEOF_PID_T == 2
 523 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
 524 #else
 525 #error "Unknown pid_t size"
 526 #endif
 527
 528         _cleanup_free_ char *value = NULL, *root = NULL;
 529         uint64_t a = TASKS_MAX, b = TASKS_MAX;
 530
 531         /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
 532          * limit:
 533          *
 534          * a) the maximum value for the pid_t type
 535          * b) the cgroups pids_max attribute for the system
 536          * c) the kernel's configure maximum PID value
 537          *
 538          * And then pick the smallest of the three */
 539
 540         if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
 541                 (void) safe_atou64(value, &a);
 542
 543         if (cg_get_root_path(&root) >= 0) {
 544                 value = mfree(value);
 545
 546                 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
 547                         (void) safe_atou64(value, &b);
 548         }
 549
 550         return MIN3(TASKS_MAX,
 551                     a <= 0 ? TASKS_MAX : a,
 552                     b <= 0 ? TASKS_MAX : b);
 553 }
 554
 555 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
 556         uint64_t t, m;
 557
 558         assert(max > 0);
 559
 560         /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
 561          * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
 562
 563         t = system_tasks_max();
 564         assert(t > 0);
 565
 566         m = t * v;
 567         if (m / t != v) /* overflow? */
 568                 return UINT64_MAX;
 569
 570         return m / max;
 571 }
 572
 573 #if 0 /// UNNEEDED by elogind
 574 int update_reboot_parameter_and_warn(const char *param) {
 575         int r;
 576
 577         if (isempty(param)) {
 578                 if (unlink("/run/systemd/reboot-param") < 0) {
 579                         if (errno == ENOENT)
 580                                 return 0;
 581
 582                         return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
 583                 }
 584
 585                 return 0;
 586         }
 587
 588         RUN_WITH_UMASK(0022) {
 589                 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
 590                 if (r < 0)
 591                         return log_warning_errno(r, "Failed to write reboot parameter file: %m");
 592         }
 593
 594         return 0;
 595 }
 596 #endif // 0
 597
 598 int version(void) {
 599         puts(PACKAGE_STRING "\n"
 600              SYSTEMD_FEATURES);
 601         return 0;
 602 }
 603
 604 #if 0 /// UNNEEDED by elogind
 605 int get_block_device(const char *path, dev_t *dev) {
 606         struct stat st;
 607         struct statfs sfs;
 608
 609         assert(path);
 610         assert(dev);
 611
 612         /* Get's the block device directly backing a file system. If
 613          * the block device is encrypted, returns the device mapper
 614          * block device. */
 615
 616         if (lstat(path, &st))
 617                 return -errno;
 618
 619         if (major(st.st_dev) != 0) {
 620                 *dev = st.st_dev;
 621                 return 1;
 622         }
 623
 624         if (statfs(path, &sfs) < 0)
 625                 return -errno;
 626
 627         if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
 628                 return btrfs_get_block_device(path, dev);
 629
 630         return 0;
 631 }
 632
 633 int get_block_device_harder(const char *path, dev_t *dev) {
 634         _cleanup_closedir_ DIR *d = NULL;
 635         _cleanup_free_ char *t = NULL;
 636         char p[SYS_BLOCK_PATH_MAX("/slaves")];
 637         struct dirent *de, *found = NULL;
 638         const char *q;
 639         unsigned maj, min;
 640         dev_t dt;
 641         int r;
 642
 643         assert(path);
 644         assert(dev);
 645
 646         /* Gets the backing block device for a file system, and
 647          * handles LUKS encrypted file systems, looking for its
 648          * immediate parent, if there is one. */
 649
 650         r = get_block_device(path, &dt);
 651         if (r <= 0)
 652                 return r;
 653
 654         xsprintf_sys_block_path(p, "/slaves", dt);
 655         d = opendir(p);
 656         if (!d) {
 657                 if (errno == ENOENT)
 658                         goto fallback;
 659
 660                 return -errno;
 661         }
 662
 663         FOREACH_DIRENT_ALL(de, d, return -errno) {
 664
 665                 if (dot_or_dot_dot(de->d_name))
 666                         continue;
 667
 668                 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
 669                         continue;
 670
 671                 if (found) {
 672                         _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
 673
 674                         /* We found a device backed by multiple other devices. We don't really support automatic
 675                          * discovery on such setups, with the exception of dm-verity partitions. In this case there are
 676                          * two backing devices: the data partition and the hash partition. We are fine with such
 677                          * setups, however, only if both partitions are on the same physical device. Hence, let's
 678                          * verify this. */
 679
 680                         u = strjoin(p, "/", de->d_name, "/../dev");
 681                         if (!u)
 682                                 return -ENOMEM;
 683
 684                         v = strjoin(p, "/", found->d_name, "/../dev");
 685                         if (!v)
 686                                 return -ENOMEM;
 687
 688                         r = read_one_line_file(u, &a);
 689                         if (r < 0) {
 690                                 log_debug_errno(r, "Failed to read %s: %m", u);
 691                                 goto fallback;
 692                         }
 693
 694                         r = read_one_line_file(v, &b);
 695                         if (r < 0) {
 696                                 log_debug_errno(r, "Failed to read %s: %m", v);
 697                                 goto fallback;
 698                         }
 699
 700                         /* Check if the parent device is the same. If not, then the two backing devices are on
 701                          * different physical devices, and we don't support that. */
 702                         if (!streq(a, b))
 703                                 goto fallback;
 704                 }
 705
 706                 found = de;
 707         }
 708
 709         if (!found)
 710                 goto fallback;
 711
 712         q = strjoina(p, "/", found->d_name, "/dev");
 713
 714         r = read_one_line_file(q, &t);
 715         if (r == -ENOENT)
 716                 goto fallback;
 717         if (r < 0)
 718                 return r;
 719
 720         if (sscanf(t, "%u:%u", &maj, &min) != 2)
 721                 return -EINVAL;
 722
 723         if (maj == 0)
 724                 goto fallback;
 725
 726         *dev = makedev(maj, min);
 727         return 1;
 728
 729 fallback:
 730         *dev = dt;
 731         return 1;
 732 }
 733 #endif // 0