src/basic/cgroup-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2010 Lennart Poettering
   4 ***/
   5
   6 #include <dirent.h>
   7 #include <errno.h>
   8 #include <ftw.h>
   9 //#include <limits.h>
  10 #include <signal.h>
  11 //#include <stddef.h>
  12 #include <stdio_ext.h>
  13 #include <stdlib.h>
  14 #include <string.h>
  15 #include <sys/stat.h>
  16 //#include <sys/statfs.h>
  17 #include <sys/types.h>
  18 #include <sys/xattr.h>
  19 #include <unistd.h>
  20
  21 #include "alloc-util.h"
  22 #include "cgroup-util.h"
  23 //#include "def.h"
  24 #include "dirent-util.h"
  25 #include "extract-word.h"
  26 #include "fd-util.h"
  27 #include "fileio.h"
  28 #include "format-util.h"
  29 #include "fs-util.h"
  30 //#include "log.h"
  31 #include "login-util.h"
  32 #include "macro.h"
  33 //#include "missing.h"
  34 #include "mkdir.h"
  35 #include "parse-util.h"
  36 #include "path-util.h"
  37 #include "proc-cmdline.h"
  38 #include "process-util.h"
  39 #include "set.h"
  40 //#include "special.h"
  41 #include "stat-util.h"
  42 #include "stdio-util.h"
  43 #include "string-table.h"
  44 #include "string-util.h"
  45 #include "strv.h"
  46 #include "unit-name.h"
  47 #include "user-util.h"
  48
  49 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  50         _cleanup_free_ char *fs = NULL;
  51         FILE *f;
  52         int r;
  53
  54         assert(_f);
  55
  56         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  57         if (r < 0)
  58                 return r;
  59
  60         f = fopen(fs, "re");
  61         if (!f)
  62                 return -errno;
  63
  64         *_f = f;
  65         return 0;
  66 }
  67
  68 int cg_read_pid(FILE *f, pid_t *_pid) {
  69         unsigned long ul;
  70
  71         /* Note that the cgroup.procs might contain duplicates! See
  72          * cgroups.txt for details. */
  73
  74         assert(f);
  75         assert(_pid);
  76
  77         errno = 0;
  78         if (fscanf(f, "%lu", &ul) != 1) {
  79
  80                 if (feof(f))
  81                         return 0;
  82
  83                 return errno > 0 ? -errno : -EIO;
  84         }
  85
  86         if (ul <= 0)
  87                 return -EIO;
  88
  89         *_pid = (pid_t) ul;
  90         return 1;
  91 }
  92
  93 int cg_read_event(
  94                 const char *controller,
  95                 const char *path,
  96                 const char *event,
  97                 char **val) {
  98
  99         _cleanup_free_ char *events = NULL, *content = NULL;
 100         char *p, *line;
 101         int r;
 102
 103         r = cg_get_path(controller, path, "cgroup.events", &events);
 104         if (r < 0)
 105                 return r;
 106
 107         r = read_full_file(events, &content, NULL);
 108         if (r < 0)
 109                 return r;
 110
 111         p = content;
 112         while ((line = strsep(&p, "\n"))) {
 113                 char *key;
 114
 115                 key = strsep(&line, " ");
 116                 if (!key || !line)
 117                         return -EINVAL;
 118
 119                 if (strcmp(key, event))
 120                         continue;
 121
 122                 *val = strdup(line);
 123                 return 0;
 124         }
 125
 126         return -ENOENT;
 127 }
 128
 129 #if 0 /// UNNEEDED by elogind
 130 bool cg_ns_supported(void) {
 131         static thread_local int enabled = -1;
 132
 133         if (enabled >= 0)
 134                 return enabled;
 135
 136         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 137                 enabled = 1;
 138         else
 139                 enabled = 0;
 140
 141         return enabled;
 142 }
 143 #endif // 0
 144
 145 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 146         _cleanup_free_ char *fs = NULL;
 147         int r;
 148         DIR *d;
 149
 150         assert(_d);
 151
 152         /* This is not recursive! */
 153
 154         r = cg_get_path(controller, path, NULL, &fs);
 155         if (r < 0)
 156                 return r;
 157
 158         d = opendir(fs);
 159         if (!d)
 160                 return -errno;
 161
 162         *_d = d;
 163         return 0;
 164 }
 165
 166 int cg_read_subgroup(DIR *d, char **fn) {
 167         struct dirent *de;
 168
 169         assert(d);
 170         assert(fn);
 171
 172         FOREACH_DIRENT_ALL(de, d, return -errno) {
 173                 char *b;
 174
 175                 if (de->d_type != DT_DIR)
 176                         continue;
 177
 178                 if (dot_or_dot_dot(de->d_name))
 179                         continue;
 180
 181                 b = strdup(de->d_name);
 182                 if (!b)
 183                         return -ENOMEM;
 184
 185                 *fn = b;
 186                 return 1;
 187         }
 188
 189         return 0;
 190 }
 191
 192 int cg_rmdir(const char *controller, const char *path) {
 193         _cleanup_free_ char *p = NULL;
 194         int r;
 195
 196         r = cg_get_path(controller, path, NULL, &p);
 197         if (r < 0)
 198                 return r;
 199
 200         r = rmdir(p);
 201         if (r < 0 && errno != ENOENT)
 202                 return -errno;
 203
 204         r = cg_hybrid_unified();
 205         if (r < 0)
 206                 return r;
 207         if (r == 0)
 208                 return 0;
 209
 210         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 211                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 212                 if (r < 0)
 213                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 214         }
 215
 216         return 0;
 217 }
 218
 219 int cg_kill(
 220                 const char *controller,
 221                 const char *path,
 222                 int sig,
 223                 CGroupFlags flags,
 224                 Set *s,
 225                 cg_kill_log_func_t log_kill,
 226                 void *userdata) {
 227
 228         _cleanup_set_free_ Set *allocated_set = NULL;
 229         bool done = false;
 230         int r, ret = 0;
 231         pid_t my_pid;
 232
 233         assert(sig >= 0);
 234
 235          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 236           * SIGCONT on SIGKILL. */
 237         if (IN_SET(sig, SIGCONT, SIGKILL))
 238                 flags &= ~CGROUP_SIGCONT;
 239
 240         /* This goes through the tasks list and kills them all. This
 241          * is repeated until no further processes are added to the
 242          * tasks list, to properly handle forking processes */
 243
 244         if (!s) {
 245                 s = allocated_set = set_new(NULL);
 246                 if (!s)
 247                         return -ENOMEM;
 248         }
 249
 250         my_pid = getpid_cached();
 251
 252         do {
 253                 _cleanup_fclose_ FILE *f = NULL;
 254                 pid_t pid = 0;
 255                 done = true;
 256
 257                 r = cg_enumerate_processes(controller, path, &f);
 258                 if (r < 0) {
 259                         if (ret >= 0 && r != -ENOENT)
 260                                 return r;
 261
 262                         return ret;
 263                 }
 264
 265                 while ((r = cg_read_pid(f, &pid)) > 0) {
 266
 267                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 268                                 continue;
 269
 270                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 271                                 continue;
 272
 273                         if (log_kill)
 274                                 log_kill(pid, sig, userdata);
 275
 276                         /* If we haven't killed this process yet, kill
 277                          * it */
 278                         if (kill(pid, sig) < 0) {
 279                                 if (ret >= 0 && errno != ESRCH)
 280                                         ret = -errno;
 281                         } else {
 282                                 if (flags & CGROUP_SIGCONT)
 283                                         (void) kill(pid, SIGCONT);
 284
 285                                 if (ret == 0)
 286                                         ret = 1;
 287                         }
 288
 289                         done = false;
 290
 291                         r = set_put(s, PID_TO_PTR(pid));
 292                         if (r < 0) {
 293                                 if (ret >= 0)
 294                                         return r;
 295
 296                                 return ret;
 297                         }
 298                 }
 299
 300                 if (r < 0) {
 301                         if (ret >= 0)
 302                                 return r;
 303
 304                         return ret;
 305                 }
 306
 307                 /* To avoid racing against processes which fork
 308                  * quicker than we can kill them we repeat this until
 309                  * no new pids need to be killed. */
 310
 311         } while (!done);
 312
 313         return ret;
 314 }
 315
 316 int cg_kill_recursive(
 317                 const char *controller,
 318                 const char *path,
 319                 int sig,
 320                 CGroupFlags flags,
 321                 Set *s,
 322                 cg_kill_log_func_t log_kill,
 323                 void *userdata) {
 324
 325         _cleanup_set_free_ Set *allocated_set = NULL;
 326         _cleanup_closedir_ DIR *d = NULL;
 327         int r, ret;
 328         char *fn;
 329
 330         assert(path);
 331         assert(sig >= 0);
 332
 333         if (!s) {
 334                 s = allocated_set = set_new(NULL);
 335                 if (!s)
 336                         return -ENOMEM;
 337         }
 338
 339         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 340
 341         r = cg_enumerate_subgroups(controller, path, &d);
 342         if (r < 0) {
 343                 if (ret >= 0 && r != -ENOENT)
 344                         return r;
 345
 346                 return ret;
 347         }
 348
 349         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 350                 _cleanup_free_ char *p = NULL;
 351
 352                 p = strjoin(path, "/", fn);
 353                 free(fn);
 354                 if (!p)
 355                         return -ENOMEM;
 356
 357                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 358                 if (r != 0 && ret >= 0)
 359                         ret = r;
 360         }
 361         if (ret >= 0 && r < 0)
 362                 ret = r;
 363
 364         if (flags & CGROUP_REMOVE) {
 365                 r = cg_rmdir(controller, path);
 366                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 367                         return r;
 368         }
 369
 370         return ret;
 371 }
 372
 373 int cg_migrate(
 374                 const char *cfrom,
 375                 const char *pfrom,
 376                 const char *cto,
 377                 const char *pto,
 378                 CGroupFlags flags) {
 379
 380         bool done = false;
 381         _cleanup_set_free_ Set *s = NULL;
 382         int r, ret = 0;
 383         pid_t my_pid;
 384
 385         assert(cfrom);
 386         assert(pfrom);
 387         assert(cto);
 388         assert(pto);
 389
 390         s = set_new(NULL);
 391         if (!s)
 392                 return -ENOMEM;
 393
 394         my_pid = getpid_cached();
 395
 396         log_debug_elogind("Migrating \"%s\"/\"%s\" to \"%s\"/\"%s\" (%s)",
 397                           cfrom, pfrom, cto, pto,
 398                           (flags & CGROUP_IGNORE_SELF)
 399                           ? "ignoring self" : "watching self");
 400         do {
 401                 _cleanup_fclose_ FILE *f = NULL;
 402                 pid_t pid = 0;
 403                 done = true;
 404
 405                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 406                 if (r < 0) {
 407                         if (ret >= 0 && r != -ENOENT)
 408                                 return r;
 409
 410                         return ret;
 411                 }
 412
 413                 while ((r = cg_read_pid(f, &pid)) > 0) {
 414
 415                         /* This might do weird stuff if we aren't a
 416                          * single-threaded program. However, we
 417                          * luckily know we are not */
 418                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 419                                 continue;
 420
 421                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 422                                 continue;
 423
 424                         /* Ignore kernel threads. Since they can only
 425                          * exist in the root cgroup, we only check for
 426                          * them there. */
 427                         if (cfrom &&
 428                             empty_or_root(pfrom) &&
 429                             is_kernel_thread(pid) > 0)
 430                                 continue;
 431
 432                         r = cg_attach(cto, pto, pid);
 433                         if (r < 0) {
 434                                 if (ret >= 0 && r != -ESRCH)
 435                                         ret = r;
 436                         } else if (ret == 0)
 437                                 ret = 1;
 438
 439                         done = false;
 440
 441                         r = set_put(s, PID_TO_PTR(pid));
 442                         if (r < 0) {
 443                                 if (ret >= 0)
 444                                         return r;
 445
 446                                 return ret;
 447                         }
 448                 }
 449
 450                 if (r < 0) {
 451                         if (ret >= 0)
 452                                 return r;
 453
 454                         return ret;
 455                 }
 456         } while (!done);
 457
 458         return ret;
 459 }
 460
 461 int cg_migrate_recursive(
 462                 const char *cfrom,
 463                 const char *pfrom,
 464                 const char *cto,
 465                 const char *pto,
 466                 CGroupFlags flags) {
 467
 468         _cleanup_closedir_ DIR *d = NULL;
 469         int r, ret = 0;
 470         char *fn;
 471
 472         assert(cfrom);
 473         assert(pfrom);
 474         assert(cto);
 475         assert(pto);
 476
 477         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 478
 479         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 480         if (r < 0) {
 481                 if (ret >= 0 && r != -ENOENT)
 482                         return r;
 483
 484                 return ret;
 485         }
 486
 487         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 488                 _cleanup_free_ char *p = NULL;
 489
 490                 p = strjoin(pfrom, "/", fn);
 491                 free(fn);
 492                 if (!p)
 493                         return -ENOMEM;
 494
 495                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 496                 if (r != 0 && ret >= 0)
 497                         ret = r;
 498         }
 499
 500         if (r < 0 && ret >= 0)
 501                 ret = r;
 502
 503         if (flags & CGROUP_REMOVE) {
 504                 r = cg_rmdir(cfrom, pfrom);
 505                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 506                         return r;
 507         }
 508
 509         return ret;
 510 }
 511
 512 int cg_migrate_recursive_fallback(
 513                 const char *cfrom,
 514                 const char *pfrom,
 515                 const char *cto,
 516                 const char *pto,
 517                 CGroupFlags flags) {
 518
 519         int r;
 520
 521         assert(cfrom);
 522         assert(pfrom);
 523         assert(cto);
 524         assert(pto);
 525
 526         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 527         if (r < 0) {
 528                 char prefix[strlen(pto) + 1];
 529
 530                 /* This didn't work? Then let's try all prefixes of the destination */
 531
 532                 PATH_FOREACH_PREFIX(prefix, pto) {
 533                         int q;
 534
 535                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 536                         if (q >= 0)
 537                                 return q;
 538                 }
 539         }
 540
 541         return r;
 542 }
 543
 544 static const char *controller_to_dirname(const char *controller) {
 545         const char *e;
 546
 547         assert(controller);
 548
 549         /* Converts a controller name to the directory name below
 550          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 551          * just cuts off the name= prefixed used for named
 552          * hierarchies, if it is specified. */
 553
 554         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 555                 if (cg_hybrid_unified() > 0)
 556                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 557                 else
 558                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 559         }
 560
 561         e = startswith(controller, "name=");
 562         if (e)
 563                 return e;
 564
 565         return controller;
 566 }
 567
 568 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 569         const char *dn;
 570         char *t = NULL;
 571
 572         assert(fs);
 573         assert(controller);
 574
 575         dn = controller_to_dirname(controller);
 576
 577         if (isempty(path) && isempty(suffix))
 578                 t = strappend("/sys/fs/cgroup/", dn);
 579         else if (isempty(path))
 580                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 581         else if (isempty(suffix))
 582                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 583         else
 584                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 585         if (!t)
 586                 return -ENOMEM;
 587
 588         *fs = t;
 589         return 0;
 590 }
 591
 592 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 593         char *t;
 594
 595         assert(fs);
 596
 597         if (isempty(path) && isempty(suffix))
 598                 t = strdup("/sys/fs/cgroup");
 599         else if (isempty(path))
 600                 t = strappend("/sys/fs/cgroup/", suffix);
 601         else if (isempty(suffix))
 602                 t = strappend("/sys/fs/cgroup/", path);
 603         else
 604                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 605         if (!t)
 606                 return -ENOMEM;
 607
 608         *fs = t;
 609         return 0;
 610 }
 611
 612 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 613         int r;
 614
 615         assert(fs);
 616
 617         if (!controller) {
 618                 char *t;
 619
 620                 /* If no controller is specified, we return the path
 621                  * *below* the controllers, without any prefix. */
 622
 623                 if (!path && !suffix)
 624                         return -EINVAL;
 625
 626                 if (!suffix)
 627                         t = strdup(path);
 628                 else if (!path)
 629                         t = strdup(suffix);
 630                 else
 631                         t = strjoin(path, "/", suffix);
 632                 if (!t)
 633                         return -ENOMEM;
 634
 635                 *fs = path_simplify(t, false);
 636                 return 0;
 637         }
 638
 639         if (!cg_controller_is_valid(controller))
 640                 return -EINVAL;
 641
 642         r = cg_all_unified();
 643         if (r < 0)
 644                 return r;
 645         if (r > 0)
 646                 r = join_path_unified(path, suffix, fs);
 647         else
 648                 r = join_path_legacy(controller, path, suffix, fs);
 649         if (r < 0)
 650                 return r;
 651
 652         path_simplify(*fs, false);
 653         return 0;
 654 }
 655
 656 static int controller_is_accessible(const char *controller) {
 657         int r;
 658
 659         assert(controller);
 660
 661         /* Checks whether a specific controller is accessible,
 662          * i.e. its hierarchy mounted. In the unified hierarchy all
 663          * controllers are considered accessible, except for the named
 664          * hierarchies */
 665
 666         if (!cg_controller_is_valid(controller))
 667                 return -EINVAL;
 668
 669         r = cg_all_unified();
 670         if (r < 0)
 671                 return r;
 672         if (r > 0) {
 673                 /* We don't support named hierarchies if we are using
 674                  * the unified hierarchy. */
 675
 676                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 677                         return 0;
 678
 679                 if (startswith(controller, "name="))
 680                         return -EOPNOTSUPP;
 681
 682         } else {
 683                 const char *cc, *dn;
 684
 685                 dn = controller_to_dirname(controller);
 686                 cc = strjoina("/sys/fs/cgroup/", dn);
 687
 688                 if (laccess(cc, F_OK) < 0)
 689                         return -errno;
 690         }
 691
 692         return 0;
 693 }
 694
 695 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 696         int r;
 697
 698         assert(controller);
 699         assert(fs);
 700
 701         /* Check if the specified controller is actually accessible */
 702         r = controller_is_accessible(controller);
 703         if (r < 0)
 704                 return r;
 705
 706         return cg_get_path(controller, path, suffix, fs);
 707 }
 708
 709 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 710         assert(path);
 711         assert(sb);
 712         assert(ftwbuf);
 713
 714         if (typeflag != FTW_DP)
 715                 return 0;
 716
 717         if (ftwbuf->level < 1)
 718                 return 0;
 719
 720         (void) rmdir(path);
 721         return 0;
 722 }
 723
 724 int cg_trim(const char *controller, const char *path, bool delete_root) {
 725         _cleanup_free_ char *fs = NULL;
 726         int r = 0, q;
 727
 728         assert(path);
 729
 730         r = cg_get_path(controller, path, NULL, &fs);
 731         if (r < 0)
 732                 return r;
 733
 734         errno = 0;
 735         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 736                 if (errno == ENOENT)
 737                         r = 0;
 738                 else if (errno > 0)
 739                         r = -errno;
 740                 else
 741                         r = -EIO;
 742         }
 743
 744         if (delete_root) {
 745                 if (rmdir(fs) < 0 && errno != ENOENT)
 746                         return -errno;
 747         }
 748
 749         q = cg_hybrid_unified();
 750         if (q < 0)
 751                 return q;
 752         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 753                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 754                 if (q < 0)
 755                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 756         }
 757
 758         return r;
 759 }
 760
 761 /* Create a cgroup in the hierarchy of controller.
 762  * Returns 0 if the group already existed, 1 on success, negative otherwise.
 763  */
 764 int cg_create(const char *controller, const char *path) {
 765         _cleanup_free_ char *fs = NULL;
 766         int r;
 767
 768         r = cg_get_path_and_check(controller, path, NULL, &fs);
 769         if (r < 0)
 770                 return r;
 771
 772         r = mkdir_parents(fs, 0755);
 773         if (r < 0)
 774                 return r;
 775
 776         r = mkdir_errno_wrapper(fs, 0755);
 777         if (r == -EEXIST)
 778                 return 0;
 779         if (r < 0)
 780                 return r;
 781
 782         r = cg_hybrid_unified();
 783         if (r < 0)
 784                 return r;
 785
 786         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 787                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 788                 if (r < 0)
 789                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 790         }
 791
 792         return 1;
 793 }
 794
 795 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 796         int r, q;
 797
 798         assert(pid >= 0);
 799
 800         r = cg_create(controller, path);
 801         if (r < 0)
 802                 return r;
 803
 804         q = cg_attach(controller, path, pid);
 805         if (q < 0)
 806                 return q;
 807
 808         /* This does not remove the cgroup on failure */
 809         return r;
 810 }
 811
 812 int cg_attach(const char *controller, const char *path, pid_t pid) {
 813         _cleanup_free_ char *fs = NULL;
 814         char c[DECIMAL_STR_MAX(pid_t) + 2];
 815         int r;
 816
 817         assert(path);
 818         assert(pid >= 0);
 819
 820         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 821         if (r < 0)
 822                 return r;
 823
 824         if (pid == 0)
 825                 pid = getpid_cached();
 826
 827         xsprintf(c, PID_FMT "\n", pid);
 828
 829         r = write_string_file(fs, c, 0);
 830         if (r < 0)
 831                 return r;
 832
 833         r = cg_hybrid_unified();
 834         if (r < 0)
 835                 return r;
 836
 837         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 838                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 839                 if (r < 0)
 840                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 841         }
 842
 843         return 0;
 844 }
 845
 846 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 847         int r;
 848
 849         assert(controller);
 850         assert(path);
 851         assert(pid >= 0);
 852
 853         r = cg_attach(controller, path, pid);
 854         if (r < 0) {
 855                 char prefix[strlen(path) + 1];
 856
 857                 /* This didn't work? Then let's try all prefixes of
 858                  * the destination */
 859
 860                 PATH_FOREACH_PREFIX(prefix, path) {
 861                         int q;
 862
 863                         q = cg_attach(controller, prefix, pid);
 864                         if (q >= 0)
 865                                 return q;
 866                 }
 867         }
 868
 869         return r;
 870 }
 871
 872 #if 0 /// UNNEEDED by elogind
 873 int cg_set_access(
 874                 const char *controller,
 875                 const char *path,
 876                 uid_t uid,
 877                 gid_t gid) {
 878
 879         struct Attribute {
 880                 const char *name;
 881                 bool fatal;
 882         };
 883
 884         /* cgroupsv1, aka legacy/non-unified */
 885         static const struct Attribute legacy_attributes[] = {
 886                 { "cgroup.procs",           true  },
 887                 { "tasks",                  false },
 888                 { "cgroup.clone_children",  false },
 889                 {},
 890         };
 891
 892         /* cgroupsv2, aka unified */
 893         static const struct Attribute unified_attributes[] = {
 894                 { "cgroup.procs",           true  },
 895                 { "cgroup.subtree_control", true  },
 896                 { "cgroup.threads",         false },
 897                 {},
 898         };
 899
 900         static const struct Attribute* const attributes[] = {
 901                 [false] = legacy_attributes,
 902                 [true]  = unified_attributes,
 903         };
 904
 905         _cleanup_free_ char *fs = NULL;
 906         const struct Attribute *i;
 907         int r, unified;
 908
 909         assert(path);
 910
 911         if (uid == UID_INVALID && gid == GID_INVALID)
 912                 return 0;
 913
 914         unified = cg_unified_controller(controller);
 915         if (unified < 0)
 916                 return unified;
 917
 918         /* Configure access to the cgroup itself */
 919         r = cg_get_path(controller, path, NULL, &fs);
 920         if (r < 0)
 921                 return r;
 922
 923         r = chmod_and_chown(fs, 0755, uid, gid);
 924         if (r < 0)
 925                 return r;
 926
 927         /* Configure access to the cgroup's attributes */
 928         for (i = attributes[unified]; i->name; i++) {
 929                 fs = mfree(fs);
 930
 931                 r = cg_get_path(controller, path, i->name, &fs);
 932                 if (r < 0)
 933                         return r;
 934
 935                 r = chmod_and_chown(fs, 0644, uid, gid);
 936                 if (r < 0) {
 937                         if (i->fatal)
 938                                 return r;
 939
 940                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
 941                 }
 942         }
 943
 944         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 945                 r = cg_hybrid_unified();
 946                 if (r < 0)
 947                         return r;
 948                 if (r > 0) {
 949                         /* Always propagate access mode from unified to legacy controller */
 950                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
 951                         if (r < 0)
 952                                 log_debug_errno(r, "Failed to set access on compatibility elogind cgroup %s, ignoring: %m", path);
 953                 }
 954         }
 955
 956         return 0;
 957 }
 958
 959 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 960         _cleanup_free_ char *fs = NULL;
 961         int r;
 962
 963         assert(path);
 964         assert(name);
 965         assert(value || size <= 0);
 966
 967         r = cg_get_path(controller, path, NULL, &fs);
 968         if (r < 0)
 969                 return r;
 970
 971         if (setxattr(fs, name, value, size, flags) < 0)
 972                 return -errno;
 973
 974         return 0;
 975 }
 976
 977 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
 978         _cleanup_free_ char *fs = NULL;
 979         ssize_t n;
 980         int r;
 981
 982         assert(path);
 983         assert(name);
 984
 985         r = cg_get_path(controller, path, NULL, &fs);
 986         if (r < 0)
 987                 return r;
 988
 989         n = getxattr(fs, name, value, size);
 990         if (n < 0)
 991                 return -errno;
 992
 993         return (int) n;
 994 }
 995 #endif // 0
 996
 997 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 998         _cleanup_fclose_ FILE *f = NULL;
 999         char line[LINE_MAX];
1000 #if 0 /// At elogind we do not want that (false alarm) "maybe uninitialized" warning
1001         const char *fs, *controller_str;
1002 #else
1003         const char *fs, *controller_str = NULL;
1004 #endif // 0
1005         size_t cs = 0;
1006         int unified;
1007
1008         assert(path);
1009         assert(pid >= 0);
1010
1011         if (controller) {
1012                 if (!cg_controller_is_valid(controller))
1013                         return -EINVAL;
1014         } else
1015                 controller = SYSTEMD_CGROUP_CONTROLLER;
1016
1017         unified = cg_unified_controller(controller);
1018         if (unified < 0)
1019                 return unified;
1020         if (unified == 0) {
1021                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1022                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1023                 else
1024                         controller_str = controller;
1025
1026                 cs = strlen(controller_str);
1027         }
1028
1029         fs = procfs_file_alloca(pid, "cgroup");
1030         log_debug_elogind("Searching for PID %u in \"%s\" (controller \"%s\")",
1031                           pid, fs, controller);
1032         f = fopen(fs, "re");
1033         if (!f)
1034                 return errno == ENOENT ? -ESRCH : -errno;
1035
1036         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1037
1038         FOREACH_LINE(line, f, return -errno) {
1039                 char *e, *p;
1040
1041                 truncate_nl(line);
1042
1043                 if (unified) {
1044                         e = startswith(line, "0:");
1045                         if (!e)
1046                                 continue;
1047
1048                         e = strchr(e, ':');
1049                         if (!e)
1050                                 continue;
1051                 } else {
1052                         char *l;
1053                         size_t k;
1054                         const char *word, *state;
1055                         bool found = false;
1056
1057                         l = strchr(line, ':');
1058                         if (!l)
1059                                 continue;
1060
1061                         l++;
1062                         e = strchr(l, ':');
1063                         if (!e)
1064                                 continue;
1065
1066                         *e = 0;
1067                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
1068                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1069                                         found = true;
1070                                         break;
1071                                 }
1072                         if (!found)
1073                                 continue;
1074                 }
1075
1076                 log_debug_elogind("Found %s:%s", line, e+1);
1077                 p = strdup(e + 1);
1078                 if (!p)
1079                         return -ENOMEM;
1080
1081                 /* Truncate suffix indicating the process is a zombie */
1082                 e = endswith(p, " (deleted)");
1083                 if (e)
1084                         *e = 0;
1085
1086                 *path = p;
1087                 return 0;
1088         }
1089
1090         return -ENODATA;
1091 }
1092
1093 #if 0 /// UNNEEDED by elogind
1094 int cg_install_release_agent(const char *controller, const char *agent) {
1095         _cleanup_free_ char *fs = NULL, *contents = NULL;
1096         const char *sc;
1097         int r;
1098
1099         assert(agent);
1100
1101         r = cg_unified_controller(controller);
1102         if (r < 0)
1103                 return r;
1104         if (r > 0) /* doesn't apply to unified hierarchy */
1105                 return -EOPNOTSUPP;
1106
1107         r = cg_get_path(controller, NULL, "release_agent", &fs);
1108         if (r < 0)
1109                 return r;
1110
1111         r = read_one_line_file(fs, &contents);
1112         if (r < 0)
1113                 return r;
1114
1115         sc = strstrip(contents);
1116         if (isempty(sc)) {
1117                 r = write_string_file(fs, agent, 0);
1118                 if (r < 0)
1119                         return r;
1120         } else if (!path_equal(sc, agent))
1121                 return -EEXIST;
1122
1123         fs = mfree(fs);
1124         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1125         if (r < 0)
1126                 return r;
1127
1128         contents = mfree(contents);
1129         r = read_one_line_file(fs, &contents);
1130         if (r < 0)
1131                 return r;
1132
1133         sc = strstrip(contents);
1134         if (streq(sc, "0")) {
1135                 r = write_string_file(fs, "1", 0);
1136                 if (r < 0)
1137                         return r;
1138
1139                 return 1;
1140         }
1141
1142         if (!streq(sc, "1"))
1143                 return -EIO;
1144
1145         return 0;
1146 }
1147
1148 int cg_uninstall_release_agent(const char *controller) {
1149         _cleanup_free_ char *fs = NULL;
1150         int r;
1151
1152         r = cg_unified_controller(controller);
1153         if (r < 0)
1154                 return r;
1155         if (r > 0) /* Doesn't apply to unified hierarchy */
1156                 return -EOPNOTSUPP;
1157
1158         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1159         if (r < 0)
1160                 return r;
1161
1162         r = write_string_file(fs, "0", 0);
1163         if (r < 0)
1164                 return r;
1165
1166         fs = mfree(fs);
1167
1168         r = cg_get_path(controller, NULL, "release_agent", &fs);
1169         if (r < 0)
1170                 return r;
1171
1172         r = write_string_file(fs, "", 0);
1173         if (r < 0)
1174                 return r;
1175
1176         return 0;
1177 }
1178 #endif // 0
1179
1180 int cg_is_empty(const char *controller, const char *path) {
1181         _cleanup_fclose_ FILE *f = NULL;
1182         pid_t pid;
1183         int r;
1184
1185         assert(path);
1186
1187         r = cg_enumerate_processes(controller, path, &f);
1188         if (r == -ENOENT)
1189                 return 1;
1190         if (r < 0)
1191                 return r;
1192
1193         r = cg_read_pid(f, &pid);
1194         if (r < 0)
1195                 return r;
1196
1197         return r == 0;
1198 }
1199
1200 int cg_is_empty_recursive(const char *controller, const char *path) {
1201         int r;
1202
1203         assert(path);
1204
1205         /* The root cgroup is always populated */
1206         if (controller && empty_or_root(path))
1207                 return false;
1208
1209         r = cg_unified_controller(controller);
1210         if (r < 0)
1211                 return r;
1212         if (r > 0) {
1213                 _cleanup_free_ char *t = NULL;
1214
1215                 /* On the unified hierarchy we can check empty state
1216                  * via the "populated" attribute of "cgroup.events". */
1217
1218                 r = cg_read_event(controller, path, "populated", &t);
1219                 if (r < 0)
1220                         return r;
1221
1222                 return streq(t, "0");
1223         } else {
1224                 _cleanup_closedir_ DIR *d = NULL;
1225                 char *fn;
1226
1227                 r = cg_is_empty(controller, path);
1228                 if (r <= 0)
1229                         return r;
1230
1231                 r = cg_enumerate_subgroups(controller, path, &d);
1232                 if (r == -ENOENT)
1233                         return 1;
1234                 if (r < 0)
1235                         return r;
1236
1237                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1238                         _cleanup_free_ char *p = NULL;
1239
1240                         p = strjoin(path, "/", fn);
1241                         free(fn);
1242                         if (!p)
1243                                 return -ENOMEM;
1244
1245                         r = cg_is_empty_recursive(controller, p);
1246                         if (r <= 0)
1247                                 return r;
1248                 }
1249                 if (r < 0)
1250                         return r;
1251
1252                 return true;
1253         }
1254 }
1255
1256 int cg_split_spec(const char *spec, char **controller, char **path) {
1257         char *t = NULL, *u = NULL;
1258         const char *e;
1259
1260         assert(spec);
1261
1262         if (*spec == '/') {
1263                 if (!path_is_normalized(spec))
1264                         return -EINVAL;
1265
1266                 if (path) {
1267                         t = strdup(spec);
1268                         if (!t)
1269                                 return -ENOMEM;
1270
1271                         *path = path_simplify(t, false);
1272                 }
1273
1274                 if (controller)
1275                         *controller = NULL;
1276
1277                 return 0;
1278         }
1279
1280         e = strchr(spec, ':');
1281         if (!e) {
1282                 if (!cg_controller_is_valid(spec))
1283                         return -EINVAL;
1284
1285                 if (controller) {
1286                         t = strdup(spec);
1287                         if (!t)
1288                                 return -ENOMEM;
1289
1290                         *controller = t;
1291                 }
1292
1293                 if (path)
1294                         *path = NULL;
1295
1296                 return 0;
1297         }
1298
1299         t = strndup(spec, e-spec);
1300         if (!t)
1301                 return -ENOMEM;
1302         if (!cg_controller_is_valid(t)) {
1303                 free(t);
1304                 return -EINVAL;
1305         }
1306
1307         if (isempty(e+1))
1308                 u = NULL;
1309         else {
1310                 u = strdup(e+1);
1311                 if (!u) {
1312                         free(t);
1313                         return -ENOMEM;
1314                 }
1315
1316                 if (!path_is_normalized(u) ||
1317                     !path_is_absolute(u)) {
1318                         free(t);
1319                         free(u);
1320                         return -EINVAL;
1321                 }
1322
1323                 path_simplify(u, false);
1324         }
1325
1326         if (controller)
1327                 *controller = t;
1328         else
1329                 free(t);
1330
1331         if (path)
1332                 *path = u;
1333         else
1334                 free(u);
1335
1336         return 0;
1337 }
1338
1339 int cg_mangle_path(const char *path, char **result) {
1340         _cleanup_free_ char *c = NULL, *p = NULL;
1341         char *t;
1342         int r;
1343
1344         assert(path);
1345         assert(result);
1346
1347         /* First, check if it already is a filesystem path */
1348         if (path_startswith(path, "/sys/fs/cgroup")) {
1349
1350                 t = strdup(path);
1351                 if (!t)
1352                         return -ENOMEM;
1353
1354                 *result = path_simplify(t, false);
1355                 return 0;
1356         }
1357
1358         /* Otherwise, treat it as cg spec */
1359         r = cg_split_spec(path, &c, &p);
1360         if (r < 0)
1361                 return r;
1362
1363         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1364 }
1365
1366 int cg_get_root_path(char **path) {
1367         char *p, *e;
1368         int r;
1369
1370         assert(path);
1371
1372         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1373         if (r < 0)
1374                 return r;
1375
1376 #if 0 /// elogind does not support systemd scopes and slices
1377         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1378         if (!e)
1379                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1380         if (!e)
1381                 e = endswith(p, "/system"); /* even more legacy */
1382 #else
1383         e = endswith(p, "/elogind");
1384 #endif // 0
1385         if (e)
1386                 *e = 0;
1387
1388         *path = p;
1389         return 0;
1390 }
1391
1392 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1393         _cleanup_free_ char *rt = NULL;
1394         char *p;
1395         int r;
1396
1397         assert(cgroup);
1398         assert(shifted);
1399
1400         if (!root) {
1401                 /* If the root was specified let's use that, otherwise
1402                  * let's determine it from PID 1 */
1403
1404                 r = cg_get_root_path(&rt);
1405                 if (r < 0)
1406                         return r;
1407
1408                 root = rt;
1409                 log_debug_elogind("Determined root path: \"%s\"", root);
1410         }
1411
1412         p = path_startswith(cgroup, root);
1413 #if 0 /// With other controllers, elogind might end up in /elogind, and *p is 0
1414         if (p && p > cgroup)
1415 #else
1416         if (p && p[0] && (p > cgroup))
1417 #endif // 0
1418                 *shifted = p - 1;
1419         else
1420                 *shifted = cgroup;
1421
1422         return 0;
1423 }
1424
1425 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1426         _cleanup_free_ char *raw = NULL;
1427         const char *c;
1428         int r;
1429
1430         assert(pid >= 0);
1431         assert(cgroup);
1432
1433         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1434         if (r < 0)
1435                 return r;
1436
1437         log_debug_elogind("Shifting path: \"%s\" (PID %u, root: \"%s\")",
1438                           raw, pid, root ? root : "NULL");
1439         r = cg_shift_path(raw, root, &c);
1440         if (r < 0)
1441                 return r;
1442
1443         if (c == raw)
1444                 *cgroup = TAKE_PTR(raw);
1445         else {
1446                 char *n;
1447
1448                 n = strdup(c);
1449                 if (!n)
1450                         return -ENOMEM;
1451
1452                 *cgroup = n;
1453         }
1454         log_debug_elogind("Resulting cgroup:\"%s\"", *cgroup);
1455
1456         return 0;
1457 }
1458
1459 int cg_path_decode_unit(const char *cgroup, char **unit) {
1460         char *c, *s;
1461         size_t n;
1462
1463         assert(cgroup);
1464         assert(unit);
1465
1466 #if 0 /// elogind has a different naming: <controller>:/<session id>. So prefix is always len < 3
1467         n = strcspn(cgroup, "/");
1468         if (n < 3)
1469                 return -ENXIO;
1470 #else
1471         n = strspn(cgroup, "/") + 1;
1472 #endif // 0
1473
1474         c = strndupa(cgroup, n);
1475         c = cg_unescape(c);
1476
1477 #if 0 /// elogind session ids are never valid unit names.
1478         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1479                 return -ENXIO;
1480 #endif // 0
1481
1482         s = strdup(c);
1483         if (!s)
1484                 return -ENOMEM;
1485
1486         *unit = s;
1487         return 0;
1488 }
1489
1490 static bool valid_slice_name(const char *p, size_t n) {
1491
1492         if (!p)
1493                 return false;
1494
1495         if (n < STRLEN("x.slice"))
1496                 return false;
1497
1498         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1499                 char buf[n+1], *c;
1500
1501                 memcpy(buf, p, n);
1502                 buf[n] = 0;
1503
1504                 c = cg_unescape(buf);
1505
1506                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1507         }
1508
1509         return false;
1510 }
1511
1512 static const char *skip_slices(const char *p) {
1513         assert(p);
1514
1515         /* Skips over all slice assignments */
1516
1517         for (;;) {
1518                 size_t n;
1519
1520                 p += strspn(p, "/");
1521
1522                 n = strcspn(p, "/");
1523                 if (!valid_slice_name(p, n))
1524                         return p;
1525
1526                 p += n;
1527         }
1528 }
1529
1530 int cg_path_get_unit(const char *path, char **ret) {
1531         const char *e;
1532         char *unit;
1533         int r;
1534
1535         assert(path);
1536         assert(ret);
1537
1538         e = skip_slices(path);
1539
1540         r = cg_path_decode_unit(e, &unit);
1541         if (r < 0)
1542                 return r;
1543
1544         /* We skipped over the slices, don't accept any now */
1545         if (endswith(unit, ".slice")) {
1546                 free(unit);
1547                 return -ENXIO;
1548         }
1549
1550         *ret = unit;
1551         return 0;
1552 }
1553
1554 int cg_pid_get_unit(pid_t pid, char **unit) {
1555         _cleanup_free_ char *cgroup = NULL;
1556         int r;
1557
1558         assert(unit);
1559
1560         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1561         if (r < 0)
1562                 return r;
1563
1564         return cg_path_get_unit(cgroup, unit);
1565 }
1566
1567 #if 0 /// UNNEEDED by elogind
1568 /**
1569  * Skip session-*.scope, but require it to be there.
1570  */
1571 static const char *skip_session(const char *p) {
1572         size_t n;
1573
1574         if (isempty(p))
1575                 return NULL;
1576
1577         p += strspn(p, "/");
1578
1579         n = strcspn(p, "/");
1580         if (n < STRLEN("session-x.scope"))
1581                 return NULL;
1582
1583         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1584                 char buf[n - 8 - 6 + 1];
1585
1586                 memcpy(buf, p + 8, n - 8 - 6);
1587                 buf[n - 8 - 6] = 0;
1588
1589                 /* Note that session scopes never need unescaping,
1590                  * since they cannot conflict with the kernel's own
1591                  * names, hence we don't need to call cg_unescape()
1592                  * here. */
1593
1594                 if (!session_id_valid(buf))
1595                         return false;
1596
1597                 p += n;
1598                 p += strspn(p, "/");
1599                 return p;
1600         }
1601
1602         return NULL;
1603 }
1604
1605 /**
1606  * Skip user@*.service, but require it to be there.
1607  */
1608 static const char *skip_user_manager(const char *p) {
1609         size_t n;
1610
1611         if (isempty(p))
1612                 return NULL;
1613
1614         p += strspn(p, "/");
1615
1616         n = strcspn(p, "/");
1617         if (n < STRLEN("user@x.service"))
1618                 return NULL;
1619
1620         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1621                 char buf[n - 5 - 8 + 1];
1622
1623                 memcpy(buf, p + 5, n - 5 - 8);
1624                 buf[n - 5 - 8] = 0;
1625
1626                 /* Note that user manager services never need unescaping,
1627                  * since they cannot conflict with the kernel's own
1628                  * names, hence we don't need to call cg_unescape()
1629                  * here. */
1630
1631                 if (parse_uid(buf, NULL) < 0)
1632                         return NULL;
1633
1634                 p += n;
1635                 p += strspn(p, "/");
1636
1637                 return p;
1638         }
1639
1640         return NULL;
1641 }
1642
1643 static const char *skip_user_prefix(const char *path) {
1644         const char *e, *t;
1645
1646         assert(path);
1647
1648         /* Skip slices, if there are any */
1649         e = skip_slices(path);
1650
1651         /* Skip the user manager, if it's in the path now... */
1652         t = skip_user_manager(e);
1653         if (t)
1654                 return t;
1655
1656         /* Alternatively skip the user session if it is in the path... */
1657         return skip_session(e);
1658 }
1659
1660 int cg_path_get_user_unit(const char *path, char **ret) {
1661         const char *t;
1662
1663         assert(path);
1664         assert(ret);
1665
1666         t = skip_user_prefix(path);
1667         if (!t)
1668                 return -ENXIO;
1669
1670         /* And from here on it looks pretty much the same as for a
1671          * system unit, hence let's use the same parser from here
1672          * on. */
1673         return cg_path_get_unit(t, ret);
1674 }
1675
1676 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1677         _cleanup_free_ char *cgroup = NULL;
1678         int r;
1679
1680         assert(unit);
1681
1682         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1683         if (r < 0)
1684                 return r;
1685
1686         return cg_path_get_user_unit(cgroup, unit);
1687 }
1688
1689 int cg_path_get_machine_name(const char *path, char **machine) {
1690         _cleanup_free_ char *u = NULL;
1691         const char *sl;
1692         int r;
1693
1694         r = cg_path_get_unit(path, &u);
1695         if (r < 0)
1696                 return r;
1697
1698         sl = strjoina("/run/systemd/machines/unit:", u);
1699         return readlink_malloc(sl, machine);
1700 }
1701
1702 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1703         _cleanup_free_ char *cgroup = NULL;
1704         int r;
1705
1706         assert(machine);
1707
1708         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1709         if (r < 0)
1710                 return r;
1711
1712         return cg_path_get_machine_name(cgroup, machine);
1713 }
1714 #endif // 0
1715
1716 int cg_path_get_session(const char *path, char **session) {
1717 #if 0 /// UNNEEDED by elogind
1718         _cleanup_free_ char *unit = NULL;
1719         char *start, *end;
1720         int r;
1721
1722         assert(path);
1723
1724         r = cg_path_get_unit(path, &unit);
1725         if (r < 0)
1726                 return r;
1727
1728         start = startswith(unit, "session-");
1729         if (!start)
1730                 return -ENXIO;
1731         end = endswith(start, ".scope");
1732         if (!end)
1733                 return -ENXIO;
1734
1735         *end = 0;
1736         if (!session_id_valid(start))
1737                 return -ENXIO;
1738 #else
1739         /* Elogind uses a flat hierarchy, just "/SESSION".  The only
1740            wrinkle is that SESSION might be escaped.  */
1741         const char *e, *n, *start;
1742
1743         assert(path);
1744         log_debug_elogind("path is \"%s\"", path);
1745         assert(path[0] == '/');
1746
1747         e = path + 1;
1748         n = strchrnul(e, '/');
1749         if (e == n)
1750                 return -ENOENT;
1751
1752         start = strndupa(e, n - e);
1753         start = cg_unescape(start);
1754
1755         if (!start[0])
1756                 return -ENOENT;
1757 #endif // 0
1758
1759         if (session) {
1760                 char *rr;
1761
1762                 log_debug_elogind("found session: \"%s\"", start);
1763                 rr = strdup(start);
1764                 if (!rr)
1765                         return -ENOMEM;
1766
1767                 *session = rr;
1768         }
1769
1770         return 0;
1771 }
1772
1773 int cg_pid_get_session(pid_t pid, char **session) {
1774         _cleanup_free_ char *cgroup = NULL;
1775         int r;
1776
1777         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1778         if (r < 0)
1779                 return r;
1780
1781         return cg_path_get_session(cgroup, session);
1782 }
1783
1784 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1785 #if 0 /// elogind needs one more value
1786         _cleanup_free_ char *slice = NULL;
1787         char *start, *end;
1788 #else
1789         _cleanup_free_ char *slice = NULL, *p = NULL, *s = NULL;
1790 #endif // 0
1791         int r;
1792
1793         assert(path);
1794
1795         r = cg_path_get_slice(path, &slice);
1796         if (r < 0)
1797                 return r;
1798
1799 #if 0 /// elogind does not support systemd slices
1800         start = startswith(slice, "user-");
1801         if (!start)
1802                 return -ENXIO;
1803         end = endswith(start, ".slice");
1804         if (!end)
1805                 return -ENXIO;
1806
1807         *end = 0;
1808         if (parse_uid(start, uid) < 0)
1809                 return -ENXIO;
1810 #else
1811         p = strappend("/run/systemd/sessions/", slice);
1812
1813         r = parse_env_file(p, NEWLINE, "UID", &s, NULL);
1814         if (r == -ENOENT)
1815                 return -ENXIO;
1816         if (r < 0)
1817                 return r;
1818         if (isempty(s))
1819                 return -EIO;
1820
1821         if (parse_uid(s, uid) < 0)
1822                 return -ENXIO;
1823 #endif // 0
1824
1825         return 0;
1826 }
1827
1828 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1829         _cleanup_free_ char *cgroup = NULL;
1830         int r;
1831
1832         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1833         if (r < 0)
1834                 return r;
1835
1836         return cg_path_get_owner_uid(cgroup, uid);
1837 }
1838
1839 int cg_path_get_slice(const char *p, char **slice) {
1840         const char *e = NULL;
1841
1842         assert(p);
1843         assert(slice);
1844
1845 #if 0 /// elogind does not support systemd slices
1846         /* Finds the right-most slice unit from the beginning, but
1847          * stops before we come to the first non-slice unit. */
1848
1849         for (;;) {
1850                 size_t n;
1851
1852                 p += strspn(p, "/");
1853
1854                 n = strcspn(p, "/");
1855                 if (!valid_slice_name(p, n)) {
1856
1857                         if (!e) {
1858                                 char *s;
1859
1860                                 s = strdup(SPECIAL_ROOT_SLICE);
1861                                 if (!s)
1862                                         return -ENOMEM;
1863
1864                                 *slice = s;
1865                                 return 0;
1866                         }
1867
1868                         return cg_path_decode_unit(e, slice);
1869                 }
1870
1871                 e = p;
1872                 p += n;
1873         }
1874 #else
1875         /* In elogind, what is reported here, is the location of
1876          * the session. This is derived from /proc/<self|PID>/cgroup.
1877          * In there we look at the controller, which will look something
1878          * like "1:name=openrc:/3".
1879          * The last part gets extracted (and is now p), which is "/3" in
1880          * this case. The three is the session id, and that can be mapped.
1881          */
1882         e = startswith(p, "/");
1883
1884         if (e)
1885                 *slice = strdup(e);
1886         else
1887                 *slice = strdup(p);
1888
1889         return 0;
1890 #endif // 0
1891 }
1892
1893 int cg_pid_get_slice(pid_t pid, char **slice) {
1894         _cleanup_free_ char *cgroup = NULL;
1895         int r;
1896
1897         assert(slice);
1898
1899         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1900         log_debug_elogind("Found cgroup %s for pid %u (result %d)",
1901                           cgroup, pid, r);
1902         if (r < 0)
1903                 return r;
1904
1905         return cg_path_get_slice(cgroup, slice);
1906 }
1907
1908 int cg_path_get_user_slice(const char *p, char **slice) {
1909 #if 0 /// UNNEEDED by elogind
1910         const char *t;
1911 #endif // 0
1912         assert(p);
1913         assert(slice);
1914
1915 #if 0 /// nothing to skip in elogind
1916         t = skip_user_prefix(p);
1917         if (!t)
1918                 return -ENXIO;
1919 #endif // 0
1920
1921 #if 0 /// UNNEEDED by elogind
1922         /* And now it looks pretty much the same as for a system
1923          * slice, so let's just use the same parser from here on. */
1924         return cg_path_get_slice(t, slice);
1925 #else
1926         /* In elogind there is nothing to skip, we can use the path
1927          * directly. Generally speaking this is always a session id
1928          * to user mapping. */
1929         return cg_path_get_slice(p, slice);
1930 #endif // 0
1931 }
1932
1933 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1934         _cleanup_free_ char *cgroup = NULL;
1935         int r;
1936
1937         assert(slice);
1938
1939         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1940         if (r < 0)
1941                 return r;
1942
1943         return cg_path_get_user_slice(cgroup, slice);
1944 }
1945
1946 char *cg_escape(const char *p) {
1947         bool need_prefix = false;
1948
1949         /* This implements very minimal escaping for names to be used
1950          * as file names in the cgroup tree: any name which might
1951          * conflict with a kernel name or is prefixed with '_' is
1952          * prefixed with a '_'. That way, when reading cgroup names it
1953          * is sufficient to remove a single prefixing underscore if
1954          * there is one. */
1955
1956         /* The return value of this function (unlike cg_unescape())
1957          * needs free()! */
1958
1959         if (IN_SET(p[0], 0, '_', '.') ||
1960             streq(p, "notify_on_release") ||
1961             streq(p, "release_agent") ||
1962             streq(p, "tasks") ||
1963             startswith(p, "cgroup."))
1964                 need_prefix = true;
1965         else {
1966                 const char *dot;
1967
1968                 dot = strrchr(p, '.');
1969                 if (dot) {
1970                         CGroupController c;
1971                         size_t l = dot - p;
1972
1973                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1974                                 const char *n;
1975
1976                                 n = cgroup_controller_to_string(c);
1977
1978                                 if (l != strlen(n))
1979                                         continue;
1980
1981                                 if (memcmp(p, n, l) != 0)
1982                                         continue;
1983
1984                                 need_prefix = true;
1985                                 break;
1986                         }
1987                 }
1988         }
1989
1990         if (need_prefix)
1991                 return strappend("_", p);
1992
1993         return strdup(p);
1994 }
1995
1996 char *cg_unescape(const char *p) {
1997         assert(p);
1998
1999         /* The return value of this function (unlike cg_escape())
2000          * doesn't need free()! */
2001
2002         if (p[0] == '_')
2003                 return (char*) p+1;
2004
2005         return (char*) p;
2006 }
2007
2008 #define CONTROLLER_VALID                        \
2009         DIGITS LETTERS                          \
2010         "_"
2011
2012 bool cg_controller_is_valid(const char *p) {
2013         const char *t, *s;
2014
2015         if (!p)
2016                 return false;
2017
2018         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
2019                 return true;
2020
2021         s = startswith(p, "name=");
2022         if (s)
2023                 p = s;
2024
2025         if (IN_SET(*p, 0, '_'))
2026                 return false;
2027
2028         for (t = p; *t; t++)
2029                 if (!strchr(CONTROLLER_VALID, *t))
2030                         return false;
2031
2032         if (t - p > FILENAME_MAX)
2033                 return false;
2034
2035         return true;
2036 }
2037
2038 #if 0 /// UNNEEDED by elogind
2039 int cg_slice_to_path(const char *unit, char **ret) {
2040         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
2041         const char *dash;
2042         int r;
2043
2044         assert(unit);
2045         assert(ret);
2046
2047         if (streq(unit, SPECIAL_ROOT_SLICE)) {
2048                 char *x;
2049
2050                 x = strdup("");
2051                 if (!x)
2052                         return -ENOMEM;
2053                 *ret = x;
2054                 return 0;
2055         }
2056
2057         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
2058                 return -EINVAL;
2059
2060         if (!endswith(unit, ".slice"))
2061                 return -EINVAL;
2062
2063         r = unit_name_to_prefix(unit, &p);
2064         if (r < 0)
2065                 return r;
2066
2067         dash = strchr(p, '-');
2068
2069         /* Don't allow initial dashes */
2070         if (dash == p)
2071                 return -EINVAL;
2072
2073         while (dash) {
2074                 _cleanup_free_ char *escaped = NULL;
2075                 char n[dash - p + sizeof(".slice")];
2076
2077 #if HAS_FEATURE_MEMORY_SANITIZER
2078                 /* msan doesn't instrument stpncpy, so it thinks
2079                  * n is later used unitialized:
2080                  * https://github.com/google/sanitizers/issues/926
2081                  */
2082                 zero(n);
2083 #endif
2084
2085                 /* Don't allow trailing or double dashes */
2086                 if (IN_SET(dash[1], 0, '-'))
2087                         return -EINVAL;
2088
2089                 strcpy(stpncpy(n, p, dash - p), ".slice");
2090                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
2091                         return -EINVAL;
2092
2093                 escaped = cg_escape(n);
2094                 if (!escaped)
2095                         return -ENOMEM;
2096
2097                 if (!strextend(&s, escaped, "/", NULL))
2098                         return -ENOMEM;
2099
2100                 dash = strchr(dash+1, '-');
2101         }
2102
2103         e = cg_escape(unit);
2104         if (!e)
2105                 return -ENOMEM;
2106
2107         if (!strextend(&s, e, NULL))
2108                 return -ENOMEM;
2109
2110         *ret = TAKE_PTR(s);
2111
2112         return 0;
2113 }
2114 #endif // 0
2115
2116 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2117         _cleanup_free_ char *p = NULL;
2118         int r;
2119
2120         r = cg_get_path(controller, path, attribute, &p);
2121         if (r < 0)
2122                 return r;
2123
2124         return write_string_file(p, value, 0);
2125 }
2126
2127 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2128         _cleanup_free_ char *p = NULL;
2129         int r;
2130
2131         r = cg_get_path(controller, path, attribute, &p);
2132         if (r < 0)
2133                 return r;
2134
2135         return read_one_line_file(p, ret);
2136 }
2137
2138 #if 0 /// UNNEEDED by elogind
2139 int cg_get_keyed_attribute(
2140                 const char *controller,
2141                 const char *path,
2142                 const char *attribute,
2143                 char **keys,
2144                 char **ret_values) {
2145
2146         _cleanup_free_ char *filename = NULL, *contents = NULL;
2147         const char *p;
2148         size_t n, i, n_done = 0;
2149         char **v;
2150         int r;
2151
2152         /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
2153          * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2154          * entries as 'keys'. On success each entry will be set to the value of the matching key.
2155          *
2156          * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
2157
2158         r = cg_get_path(controller, path, attribute, &filename);
2159         if (r < 0)
2160                 return r;
2161
2162         r = read_full_file(filename, &contents, NULL);
2163         if (r < 0)
2164                 return r;
2165
2166         n = strv_length(keys);
2167         if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2168                 return 0;
2169
2170         /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2171         v = newa0(char*, n);
2172
2173         for (p = contents; *p;) {
2174                 const char *w = NULL;
2175
2176                 for (i = 0; i < n; i++)
2177                         if (!v[i]) {
2178                                 w = first_word(p, keys[i]);
2179                                 if (w)
2180                                         break;
2181                         }
2182
2183                 if (w) {
2184                         size_t l;
2185
2186                         l = strcspn(w, NEWLINE);
2187                         v[i] = strndup(w, l);
2188                         if (!v[i]) {
2189                                 r = -ENOMEM;
2190                                 goto fail;
2191                         }
2192
2193                         n_done++;
2194                         if (n_done >= n)
2195                                 goto done;
2196
2197                         p = w + l;
2198                 } else
2199                         p += strcspn(p, NEWLINE);
2200
2201                 p += strspn(p, NEWLINE);
2202         }
2203
2204         r = -ENXIO;
2205
2206 fail:
2207         for (i = 0; i < n; i++)
2208                 free(v[i]);
2209
2210         return r;
2211
2212 done:
2213         memcpy(ret_values, v, sizeof(char*) * n);
2214         return 0;
2215
2216 }
2217
2218 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2219         CGroupController c;
2220         bool created;
2221         int r;
2222
2223         /* This one will create a cgroup in our private tree, but also
2224          * duplicate it in the trees specified in mask, and remove it
2225          * in all others.
2226          *
2227          * Returns 0 if the group already existed in the systemd hierarchy,
2228          * 1 on success, negative otherwise.
2229          */
2230
2231         /* First create the cgroup in our own hierarchy. */
2232         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2233         if (r < 0)
2234                 return r;
2235         created = !!r;
2236
2237         /* If we are in the unified hierarchy, we are done now */
2238         r = cg_all_unified();
2239         if (r < 0)
2240                 return r;
2241         if (r > 0)
2242                 return created;
2243
2244         /* Otherwise, do the same in the other hierarchies */
2245         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2246                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2247                 const char *n;
2248
2249                 n = cgroup_controller_to_string(c);
2250
2251                 if (mask & bit)
2252                         (void) cg_create(n, path);
2253                 else if (supported & bit)
2254                         (void) cg_trim(n, path, true);
2255         }
2256
2257         return created;
2258 }
2259
2260 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2261         CGroupController c;
2262         int r;
2263
2264         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2265         if (r < 0)
2266                 return r;
2267
2268         r = cg_all_unified();
2269         if (r < 0)
2270                 return r;
2271         if (r > 0)
2272                 return 0;
2273
2274         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2275                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2276                 const char *p = NULL;
2277
2278                 if (!(supported & bit))
2279                         continue;
2280
2281                 if (path_callback)
2282                         p = path_callback(bit, userdata);
2283
2284                 if (!p)
2285                         p = path;
2286
2287                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2288         }
2289
2290         return 0;
2291 }
2292
2293 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2294         Iterator i;
2295         void *pidp;
2296         int r = 0;
2297
2298         SET_FOREACH(pidp, pids, i) {
2299                 pid_t pid = PTR_TO_PID(pidp);
2300                 int q;
2301
2302                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2303                 if (q < 0 && r >= 0)
2304                         r = q;
2305         }
2306
2307         return r;
2308 }
2309
2310 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2311         CGroupController c;
2312         int r = 0, q;
2313
2314         if (!path_equal(from, to))  {
2315                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2316                 if (r < 0)
2317                         return r;
2318         }
2319
2320         q = cg_all_unified();
2321         if (q < 0)
2322                 return q;
2323         if (q > 0)
2324                 return r;
2325
2326         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2327                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2328                 const char *p = NULL;
2329
2330                 if (!(supported & bit))
2331                         continue;
2332
2333                 if (to_callback)
2334                         p = to_callback(bit, userdata);
2335
2336                 if (!p)
2337                         p = to;
2338
2339                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2340         }
2341
2342         return 0;
2343 }
2344
2345 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2346         CGroupController c;
2347         int r, q;
2348
2349         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2350         if (r < 0)
2351                 return r;
2352
2353         q = cg_all_unified();
2354         if (q < 0)
2355                 return q;
2356         if (q > 0)
2357                 return r;
2358
2359         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2360                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2361
2362                 if (!(supported & bit))
2363                         continue;
2364
2365                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2366         }
2367
2368         return 0;
2369 }
2370 #endif // 0
2371
2372 int cg_mask_to_string(CGroupMask mask, char **ret) {
2373         _cleanup_free_ char *s = NULL;
2374         size_t n = 0, allocated = 0;
2375         bool space = false;
2376         CGroupController c;
2377
2378         assert(ret);
2379
2380         if (mask == 0) {
2381                 *ret = NULL;
2382                 return 0;
2383         }
2384
2385         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2386                 const char *k;
2387                 size_t l;
2388
2389                 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2390                         continue;
2391
2392                 k = cgroup_controller_to_string(c);
2393                 l = strlen(k);
2394
2395                 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2396                         return -ENOMEM;
2397
2398                 if (space)
2399                         s[n] = ' ';
2400                 memcpy(s + n + space, k, l);
2401                 n += space + l;
2402
2403                 space = true;
2404         }
2405
2406         assert(s);
2407
2408         s[n] = 0;
2409         *ret = TAKE_PTR(s);
2410
2411         return 0;
2412 }
2413
2414 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2415         assert(mask);
2416         assert(value);
2417
2418         for (;;) {
2419                 _cleanup_free_ char *n = NULL;
2420                 CGroupController v;
2421                 int r;
2422
2423                 r = extract_first_word(&value, &n, NULL, 0);
2424                 if (r < 0)
2425                         return r;
2426                 if (r == 0)
2427                         break;
2428
2429                 v = cgroup_controller_from_string(n);
2430                 if (v < 0)
2431                         continue;
2432
2433                 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2434         }
2435         return 0;
2436 }
2437
2438 int cg_mask_supported(CGroupMask *ret) {
2439         CGroupMask mask = 0;
2440         int r;
2441
2442         /* Determines the mask of supported cgroup controllers. Only
2443          * includes controllers we can make sense of and that are
2444          * actually accessible. */
2445
2446         r = cg_all_unified();
2447         if (r < 0)
2448                 return r;
2449         if (r > 0) {
2450                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2451
2452                 /* In the unified hierarchy we can read the supported
2453                  * and accessible controllers from a the top-level
2454                  * cgroup attribute */
2455
2456                 r = cg_get_root_path(&root);
2457                 if (r < 0)
2458                         return r;
2459
2460                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2461                 if (r < 0)
2462                         return r;
2463
2464                 r = read_one_line_file(path, &controllers);
2465                 if (r < 0)
2466                         return r;
2467
2468                 r = cg_mask_from_string(controllers, &mask);
2469                 if (r < 0)
2470                         return r;
2471
2472                 /* Currently, we support the cpu, memory, io and pids
2473                  * controller in the unified hierarchy, mask
2474                  * everything else off. */
2475                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2476
2477         } else {
2478                 CGroupController c;
2479
2480                 /* In the legacy hierarchy, we check whether which
2481                  * hierarchies are mounted. */
2482
2483                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2484                         const char *n;
2485
2486                         n = cgroup_controller_to_string(c);
2487                         if (controller_is_accessible(n) >= 0)
2488                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2489                 }
2490         }
2491
2492         *ret = mask;
2493         return 0;
2494 }
2495
2496 #if 0 /// UNNEEDED by elogind
2497 int cg_kernel_controllers(Set **ret) {
2498         _cleanup_set_free_free_ Set *controllers = NULL;
2499         _cleanup_fclose_ FILE *f = NULL;
2500         int r;
2501
2502         assert(ret);
2503
2504         /* Determines the full list of kernel-known controllers. Might
2505          * include controllers we don't actually support, arbitrary
2506          * named hierarchies and controllers that aren't currently
2507          * accessible (because not mounted). */
2508
2509         controllers = set_new(&string_hash_ops);
2510         if (!controllers)
2511                 return -ENOMEM;
2512
2513         f = fopen("/proc/cgroups", "re");
2514         if (!f) {
2515                 if (errno == ENOENT) {
2516                         *ret = NULL;
2517                         return 0;
2518                 }
2519
2520                 return -errno;
2521         }
2522
2523         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2524
2525         /* Ignore the header line */
2526         (void) read_line(f, (size_t) -1, NULL);
2527
2528         for (;;) {
2529                 char *controller;
2530                 int enabled = 0;
2531
2532                 errno = 0;
2533                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2534
2535                         if (feof(f))
2536                                 break;
2537
2538                         if (ferror(f) && errno > 0)
2539                                 return -errno;
2540
2541                         return -EBADMSG;
2542                 }
2543
2544                 if (!enabled) {
2545                         free(controller);
2546                         continue;
2547                 }
2548
2549                 if (!cg_controller_is_valid(controller)) {
2550                         free(controller);
2551                         return -EBADMSG;
2552                 }
2553
2554                 r = set_consume(controllers, controller);
2555                 if (r < 0)
2556                         return r;
2557         }
2558
2559         *ret = TAKE_PTR(controllers);
2560
2561         return 0;
2562 }
2563 #endif // 0
2564
2565 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2566
2567 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2568  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2569  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2570  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2571  *
2572  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2573  * process management but disable the compat dual layout, we return %true on
2574  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2575  */
2576 static thread_local bool unified_systemd_v232;
2577
2578 static int cg_unified_update(void) {
2579
2580         struct statfs fs;
2581
2582         /* Checks if we support the unified hierarchy. Returns an
2583          * error when the cgroup hierarchies aren't mounted yet or we
2584          * have any other trouble determining if the unified hierarchy
2585          * is supported. */
2586
2587         if (unified_cache >= CGROUP_UNIFIED_NONE)
2588                 return 0;
2589
2590         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2591                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
2592
2593         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2594                 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2595                 unified_cache = CGROUP_UNIFIED_ALL;
2596 #if 0 /// The handling of cgroups is a bit different with elogind
2597         } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2598                         log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2599 #else
2600         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)
2601               || F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2602 #endif // 0
2603                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2604                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2605                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2606                         unified_systemd_v232 = false;
2607                 } else {
2608 #if 0 /// There is no sub-grouping within elogind
2609                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2610                                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2611
2612                         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2613                                 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2614                                 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2615                                 unified_systemd_v232 = true;
2616                         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2617                                 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2618                                 unified_cache = CGROUP_UNIFIED_NONE;
2619                         } else {
2620                                 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2621                                           (unsigned long long) fs.f_type);
2622                                 unified_cache = CGROUP_UNIFIED_NONE;
2623                         }
2624 #else
2625                         unified_cache = CGROUP_UNIFIED_NONE;
2626 #endif // 0
2627                 }
2628         } else {
2629                 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2630                           (unsigned long long) fs.f_type);
2631                 return -ENOMEDIUM;
2632         }
2633
2634         return 0;
2635 }
2636
2637 int cg_unified_controller(const char *controller) {
2638         int r;
2639
2640         r = cg_unified_update();
2641         if (r < 0)
2642                 return r;
2643
2644         if (unified_cache == CGROUP_UNIFIED_NONE)
2645                 return false;
2646
2647         if (unified_cache >= CGROUP_UNIFIED_ALL)
2648                 return true;
2649
2650 #if 0 /// only if elogind is the controller we can use cgroups2 in hybrid mode
2651         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2652 #else
2653         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID);
2654 #endif // 0
2655 }
2656
2657 int cg_all_unified(void) {
2658         int r;
2659
2660         r = cg_unified_update();
2661         if (r < 0)
2662                 return r;
2663
2664         return unified_cache >= CGROUP_UNIFIED_ALL;
2665 }
2666
2667 int cg_hybrid_unified(void) {
2668         int r;
2669
2670         r = cg_unified_update();
2671         if (r < 0)
2672                 return r;
2673
2674         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2675 }
2676
2677 int cg_unified_flush(void) {
2678         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2679
2680         return cg_unified_update();
2681 }
2682
2683 #if 0 /// UNNEEDED by elogind
2684 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2685         _cleanup_fclose_ FILE *f = NULL;
2686         _cleanup_free_ char *fs = NULL;
2687         CGroupController c;
2688         int r;
2689
2690         assert(p);
2691
2692         if (supported == 0)
2693                 return 0;
2694
2695         r = cg_all_unified();
2696         if (r < 0)
2697                 return r;
2698         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2699                 return 0;
2700
2701         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2702         if (r < 0)
2703                 return r;
2704
2705         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2706                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2707                 const char *n;
2708
2709                 if (!(supported & bit))
2710                         continue;
2711
2712                 n = cgroup_controller_to_string(c);
2713                 {
2714                         char s[1 + strlen(n) + 1];
2715
2716                         s[0] = mask & bit ? '+' : '-';
2717                         strcpy(s + 1, n);
2718
2719                         if (!f) {
2720                                 f = fopen(fs, "we");
2721                                 if (!f) {
2722                                         log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2723                                         break;
2724                                 }
2725                         }
2726
2727                         r = write_string_stream(f, s, 0);
2728                         if (r < 0) {
2729                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2730                                 clearerr(f);
2731                         }
2732                 }
2733         }
2734
2735         return 0;
2736 }
2737 #endif // 0
2738
2739 bool cg_is_unified_wanted(void) {
2740         static thread_local int wanted = -1;
2741 #if 0 /// UNNEEDED by elogind
2742         int r;
2743         bool b;
2744 #endif // 0
2745         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2746
2747         /* If we have a cached value, return that. */
2748         if (wanted >= 0)
2749                 return wanted;
2750
2751         /* If the hierarchy is already mounted, then follow whatever
2752          * was chosen for it. */
2753         if (cg_unified_flush() >= 0)
2754                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2755
2756 #if 0 /// elogind is not init and has no business with kernel command line
2757         /* Otherwise, let's see what the kernel command line has to say.
2758          * Since checking is expensive, cache a non-error result. */
2759         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2760
2761         return (wanted = r > 0 ? b : is_default);
2762 #else
2763         return is_default;
2764 #endif // 0
2765 }
2766
2767 bool cg_is_legacy_wanted(void) {
2768         static thread_local int wanted = -1;
2769
2770         /* If we have a cached value, return that. */
2771         if (wanted >= 0)
2772                 return wanted;
2773
2774         /* Check if we have cgroups2 already mounted. */
2775         if (cg_unified_flush() >= 0 &&
2776             unified_cache == CGROUP_UNIFIED_ALL)
2777                 return (wanted = false);
2778
2779         /* Otherwise, assume that at least partial legacy is wanted,
2780          * since cgroups2 should already be mounted at this point. */
2781         return (wanted = true);
2782 }
2783
2784 bool cg_is_hybrid_wanted(void) {
2785         static thread_local int wanted = -1;
2786 #if 0 /// UNNEEDED by elogind
2787         int r;
2788         bool b;
2789 #endif // 0
2790         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2791         /* We default to true if the default is "hybrid", obviously,
2792          * but also when the default is "unified", because if we get
2793          * called, it means that unified hierarchy was not mounted. */
2794
2795         /* If we have a cached value, return that. */
2796         if (wanted >= 0)
2797                 return wanted;
2798
2799         /* If the hierarchy is already mounted, then follow whatever
2800          * was chosen for it. */
2801         if (cg_unified_flush() >= 0 &&
2802             unified_cache == CGROUP_UNIFIED_ALL)
2803                 return (wanted = false);
2804
2805 #if 0 /// elogind is not init and has no business with kernel command line
2806         /* Otherwise, let's see what the kernel command line has to say.
2807          * Since checking is expensive, cache a non-error result. */
2808         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2809
2810         /* The meaning of the kernel option is reversed wrt. to the return value
2811          * of this function, hence the negation. */
2812         return (wanted = r > 0 ? !b : is_default);
2813 #else
2814         return is_default;
2815 #endif // 0
2816 }
2817
2818 #if 0 /// UNNEEDED by elogind
2819 int cg_weight_parse(const char *s, uint64_t *ret) {
2820         uint64_t u;
2821         int r;
2822
2823         if (isempty(s)) {
2824                 *ret = CGROUP_WEIGHT_INVALID;
2825                 return 0;
2826         }
2827
2828         r = safe_atou64(s, &u);
2829         if (r < 0)
2830                 return r;
2831
2832         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2833                 return -ERANGE;
2834
2835         *ret = u;
2836         return 0;
2837 }
2838
2839 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2840         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2841         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2842         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2843         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2844 };
2845
2846 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2847         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2848         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2849         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2850         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2851 };
2852
2853 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2854
2855 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2856         uint64_t u;
2857         int r;
2858
2859         if (isempty(s)) {
2860                 *ret = CGROUP_CPU_SHARES_INVALID;
2861                 return 0;
2862         }
2863
2864         r = safe_atou64(s, &u);
2865         if (r < 0)
2866                 return r;
2867
2868         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2869                 return -ERANGE;
2870
2871         *ret = u;
2872         return 0;
2873 }
2874
2875 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2876         uint64_t u;
2877         int r;
2878
2879         if (isempty(s)) {
2880                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2881                 return 0;
2882         }
2883
2884         r = safe_atou64(s, &u);
2885         if (r < 0)
2886                 return r;
2887
2888         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2889                 return -ERANGE;
2890
2891         *ret = u;
2892         return 0;
2893 }
2894 #endif // 0
2895
2896 bool is_cgroup_fs(const struct statfs *s) {
2897         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2898                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2899 }
2900
2901 bool fd_is_cgroup_fs(int fd) {
2902         struct statfs s;
2903
2904         if (fstatfs(fd, &s) < 0)
2905                 return -errno;
2906
2907         return is_cgroup_fs(&s);
2908 }
2909
2910 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2911         [CGROUP_CONTROLLER_CPU] = "cpu",
2912         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2913         [CGROUP_CONTROLLER_IO] = "io",
2914         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2915         [CGROUP_CONTROLLER_MEMORY] = "memory",
2916         [CGROUP_CONTROLLER_DEVICES] = "devices",
2917         [CGROUP_CONTROLLER_PIDS] = "pids",
2918 };
2919
2920 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);