src/basic/cgroup-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <dirent.h>
  21 #include <errno.h>
  22 #include <ftw.h>
  23 //#include <limits.h>
  24 #include <signal.h>
  25 //#include <stddef.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 //#include <sys/statfs.h>
  30 #include <sys/types.h>
  31 #include <sys/xattr.h>
  32 #include <unistd.h>
  33
  34 #include "alloc-util.h"
  35 #include "cgroup-util.h"
  36 //#include "def.h"
  37 #include "dirent-util.h"
  38 #include "extract-word.h"
  39 #include "fd-util.h"
  40 #include "fileio.h"
  41 #include "format-util.h"
  42 #include "fs-util.h"
  43 //#include "log.h"
  44 #include "login-util.h"
  45 #include "macro.h"
  46 //#include "missing.h"
  47 #include "mkdir.h"
  48 #include "parse-util.h"
  49 #include "path-util.h"
  50 #include "proc-cmdline.h"
  51 #include "process-util.h"
  52 #include "set.h"
  53 //#include "special.h"
  54 #include "stat-util.h"
  55 #include "stdio-util.h"
  56 #include "string-table.h"
  57 #include "string-util.h"
  58 #include "unit-name.h"
  59 #include "user-util.h"
  60
  61 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  62         _cleanup_free_ char *fs = NULL;
  63         FILE *f;
  64         int r;
  65
  66         assert(_f);
  67
  68         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  69         if (r < 0)
  70                 return r;
  71
  72         f = fopen(fs, "re");
  73         if (!f)
  74                 return -errno;
  75
  76         *_f = f;
  77         return 0;
  78 }
  79
  80 int cg_read_pid(FILE *f, pid_t *_pid) {
  81         unsigned long ul;
  82
  83         /* Note that the cgroup.procs might contain duplicates! See
  84          * cgroups.txt for details. */
  85
  86         assert(f);
  87         assert(_pid);
  88
  89         errno = 0;
  90         if (fscanf(f, "%lu", &ul) != 1) {
  91
  92                 if (feof(f))
  93                         return 0;
  94
  95                 return errno > 0 ? -errno : -EIO;
  96         }
  97
  98         if (ul <= 0)
  99                 return -EIO;
 100
 101         *_pid = (pid_t) ul;
 102         return 1;
 103 }
 104
 105 int cg_read_event(const char *controller, const char *path, const char *event,
 106                   char **val)
 107 {
 108         _cleanup_free_ char *events = NULL, *content = NULL;
 109         char *p, *line;
 110         int r;
 111
 112         r = cg_get_path(controller, path, "cgroup.events", &events);
 113         if (r < 0)
 114                 return r;
 115
 116         r = read_full_file(events, &content, NULL);
 117         if (r < 0)
 118                 return r;
 119
 120         p = content;
 121         while ((line = strsep(&p, "\n"))) {
 122                 char *key;
 123
 124                 key = strsep(&line, " ");
 125                 if (!key || !line)
 126                         return -EINVAL;
 127
 128                 if (strcmp(key, event))
 129                         continue;
 130
 131                 *val = strdup(line);
 132                 return 0;
 133         }
 134
 135         return -ENOENT;
 136 }
 137
 138 #if 0 /// UNNEEDED by elogind
 139 bool cg_ns_supported(void) {
 140         static thread_local int enabled = -1;
 141
 142         if (enabled >= 0)
 143                 return enabled;
 144
 145         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 146                 enabled = 1;
 147         else
 148                 enabled = 0;
 149
 150         return enabled;
 151 }
 152 #endif //0
 153
 154 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 155         _cleanup_free_ char *fs = NULL;
 156         int r;
 157         DIR *d;
 158
 159         assert(_d);
 160
 161         /* This is not recursive! */
 162
 163         r = cg_get_path(controller, path, NULL, &fs);
 164         if (r < 0)
 165                 return r;
 166
 167         d = opendir(fs);
 168         if (!d)
 169                 return -errno;
 170
 171         *_d = d;
 172         return 0;
 173 }
 174
 175 int cg_read_subgroup(DIR *d, char **fn) {
 176         struct dirent *de;
 177
 178         assert(d);
 179         assert(fn);
 180
 181         FOREACH_DIRENT_ALL(de, d, return -errno) {
 182                 char *b;
 183
 184                 if (de->d_type != DT_DIR)
 185                         continue;
 186
 187                 if (dot_or_dot_dot(de->d_name))
 188                         continue;
 189
 190                 b = strdup(de->d_name);
 191                 if (!b)
 192                         return -ENOMEM;
 193
 194                 *fn = b;
 195                 return 1;
 196         }
 197
 198         return 0;
 199 }
 200
 201 int cg_rmdir(const char *controller, const char *path) {
 202         _cleanup_free_ char *p = NULL;
 203         int r;
 204
 205         r = cg_get_path(controller, path, NULL, &p);
 206         if (r < 0)
 207                 return r;
 208
 209         r = rmdir(p);
 210         if (r < 0 && errno != ENOENT)
 211                 return -errno;
 212
 213         r = cg_hybrid_unified();
 214         if (r < 0)
 215                 return r;
 216         if (r == 0)
 217                 return 0;
 218
 219         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 220                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 221                 if (r < 0)
 222                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 223         }
 224
 225         return 0;
 226 }
 227
 228 int cg_kill(
 229                 const char *controller,
 230                 const char *path,
 231                 int sig,
 232                 CGroupFlags flags,
 233                 Set *s,
 234                 cg_kill_log_func_t log_kill,
 235                 void *userdata) {
 236
 237         _cleanup_set_free_ Set *allocated_set = NULL;
 238         bool done = false;
 239         int r, ret = 0;
 240         pid_t my_pid;
 241
 242         assert(sig >= 0);
 243
 244          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 245           * SIGCONT on SIGKILL. */
 246         if (IN_SET(sig, SIGCONT, SIGKILL))
 247                 flags &= ~CGROUP_SIGCONT;
 248
 249         /* This goes through the tasks list and kills them all. This
 250          * is repeated until no further processes are added to the
 251          * tasks list, to properly handle forking processes */
 252
 253         if (!s) {
 254                 s = allocated_set = set_new(NULL);
 255                 if (!s)
 256                         return -ENOMEM;
 257         }
 258
 259         my_pid = getpid();
 260
 261         do {
 262                 _cleanup_fclose_ FILE *f = NULL;
 263                 pid_t pid = 0;
 264                 done = true;
 265
 266                 r = cg_enumerate_processes(controller, path, &f);
 267                 if (r < 0) {
 268                         if (ret >= 0 && r != -ENOENT)
 269                                 return r;
 270
 271                         return ret;
 272                 }
 273
 274                 while ((r = cg_read_pid(f, &pid)) > 0) {
 275
 276                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 277                                 continue;
 278
 279                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 280                                 continue;
 281
 282                         if (log_kill)
 283                                 log_kill(pid, sig, userdata);
 284
 285                         /* If we haven't killed this process yet, kill
 286                          * it */
 287                         if (kill(pid, sig) < 0) {
 288                                 if (ret >= 0 && errno != ESRCH)
 289                                         ret = -errno;
 290                         } else {
 291                                 if (flags & CGROUP_SIGCONT)
 292                                         (void) kill(pid, SIGCONT);
 293
 294                                 if (ret == 0)
 295                                         ret = 1;
 296                         }
 297
 298                         done = false;
 299
 300                         r = set_put(s, PID_TO_PTR(pid));
 301                         if (r < 0) {
 302                                 if (ret >= 0)
 303                                         return r;
 304
 305                                 return ret;
 306                         }
 307                 }
 308
 309                 if (r < 0) {
 310                         if (ret >= 0)
 311                                 return r;
 312
 313                         return ret;
 314                 }
 315
 316                 /* To avoid racing against processes which fork
 317                  * quicker than we can kill them we repeat this until
 318                  * no new pids need to be killed. */
 319
 320         } while (!done);
 321
 322         return ret;
 323 }
 324
 325 int cg_kill_recursive(
 326                 const char *controller,
 327                 const char *path,
 328                 int sig,
 329                 CGroupFlags flags,
 330                 Set *s,
 331                 cg_kill_log_func_t log_kill,
 332                 void *userdata) {
 333
 334         _cleanup_set_free_ Set *allocated_set = NULL;
 335         _cleanup_closedir_ DIR *d = NULL;
 336         int r, ret;
 337         char *fn;
 338
 339         assert(path);
 340         assert(sig >= 0);
 341
 342         if (!s) {
 343                 s = allocated_set = set_new(NULL);
 344                 if (!s)
 345                         return -ENOMEM;
 346         }
 347
 348         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 349
 350         r = cg_enumerate_subgroups(controller, path, &d);
 351         if (r < 0) {
 352                 if (ret >= 0 && r != -ENOENT)
 353                         return r;
 354
 355                 return ret;
 356         }
 357
 358         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 359                 _cleanup_free_ char *p = NULL;
 360
 361                 p = strjoin(path, "/", fn);
 362                 free(fn);
 363                 if (!p)
 364                         return -ENOMEM;
 365
 366                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 367                 if (r != 0 && ret >= 0)
 368                         ret = r;
 369         }
 370         if (ret >= 0 && r < 0)
 371                 ret = r;
 372
 373         if (flags & CGROUP_REMOVE) {
 374                 r = cg_rmdir(controller, path);
 375                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 376                         return r;
 377         }
 378
 379         return ret;
 380 }
 381
 382 int cg_migrate(
 383                 const char *cfrom,
 384                 const char *pfrom,
 385                 const char *cto,
 386                 const char *pto,
 387                 CGroupFlags flags) {
 388
 389         bool done = false;
 390         _cleanup_set_free_ Set *s = NULL;
 391         int r, ret = 0;
 392         pid_t my_pid;
 393
 394         assert(cfrom);
 395         assert(pfrom);
 396         assert(cto);
 397         assert(pto);
 398
 399         s = set_new(NULL);
 400         if (!s)
 401                 return -ENOMEM;
 402
 403         my_pid = getpid();
 404
 405         log_debug_elogind("Migrating \"%s\"/\"%s\" to \"%s\"/\"%s\" (%s)",
 406                           cfrom, pfrom, cto, pto,
 407                           (flags & CGROUP_IGNORE_SELF)
 408                           ? "ignoring self" : "watching self");
 409         do {
 410                 _cleanup_fclose_ FILE *f = NULL;
 411                 pid_t pid = 0;
 412                 done = true;
 413
 414                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 415                 if (r < 0) {
 416                         if (ret >= 0 && r != -ENOENT)
 417                                 return r;
 418
 419                         return ret;
 420                 }
 421
 422                 while ((r = cg_read_pid(f, &pid)) > 0) {
 423
 424                         /* This might do weird stuff if we aren't a
 425                          * single-threaded program. However, we
 426                          * luckily know we are not */
 427                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 428                                 continue;
 429
 430                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 431                                 continue;
 432
 433                         /* Ignore kernel threads. Since they can only
 434                          * exist in the root cgroup, we only check for
 435                          * them there. */
 436                         if (cfrom &&
 437                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 438                             is_kernel_thread(pid) > 0)
 439                                 continue;
 440
 441                         r = cg_attach(cto, pto, pid);
 442                         if (r < 0) {
 443                                 if (ret >= 0 && r != -ESRCH)
 444                                         ret = r;
 445                         } else if (ret == 0)
 446                                 ret = 1;
 447
 448                         done = false;
 449
 450                         r = set_put(s, PID_TO_PTR(pid));
 451                         if (r < 0) {
 452                                 if (ret >= 0)
 453                                         return r;
 454
 455                                 return ret;
 456                         }
 457                 }
 458
 459                 if (r < 0) {
 460                         if (ret >= 0)
 461                                 return r;
 462
 463                         return ret;
 464                 }
 465         } while (!done);
 466
 467         return ret;
 468 }
 469
 470 int cg_migrate_recursive(
 471                 const char *cfrom,
 472                 const char *pfrom,
 473                 const char *cto,
 474                 const char *pto,
 475                 CGroupFlags flags) {
 476
 477         _cleanup_closedir_ DIR *d = NULL;
 478         int r, ret = 0;
 479         char *fn;
 480
 481         assert(cfrom);
 482         assert(pfrom);
 483         assert(cto);
 484         assert(pto);
 485
 486         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 487
 488         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 489         if (r < 0) {
 490                 if (ret >= 0 && r != -ENOENT)
 491                         return r;
 492
 493                 return ret;
 494         }
 495
 496         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 497                 _cleanup_free_ char *p = NULL;
 498
 499                 p = strjoin(pfrom, "/", fn);
 500                 free(fn);
 501                 if (!p)
 502                         return -ENOMEM;
 503
 504                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 505                 if (r != 0 && ret >= 0)
 506                         ret = r;
 507         }
 508
 509         if (r < 0 && ret >= 0)
 510                 ret = r;
 511
 512         if (flags & CGROUP_REMOVE) {
 513                 r = cg_rmdir(cfrom, pfrom);
 514                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 515                         return r;
 516         }
 517
 518         return ret;
 519 }
 520
 521 int cg_migrate_recursive_fallback(
 522                 const char *cfrom,
 523                 const char *pfrom,
 524                 const char *cto,
 525                 const char *pto,
 526                 CGroupFlags flags) {
 527
 528         int r;
 529
 530         assert(cfrom);
 531         assert(pfrom);
 532         assert(cto);
 533         assert(pto);
 534
 535         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 536         if (r < 0) {
 537                 char prefix[strlen(pto) + 1];
 538
 539                 /* This didn't work? Then let's try all prefixes of the destination */
 540
 541                 PATH_FOREACH_PREFIX(prefix, pto) {
 542                         int q;
 543
 544                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 545                         if (q >= 0)
 546                                 return q;
 547                 }
 548         }
 549
 550         return r;
 551 }
 552
 553 static const char *controller_to_dirname(const char *controller) {
 554         const char *e;
 555
 556         assert(controller);
 557
 558         /* Converts a controller name to the directory name below
 559          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 560          * just cuts off the name= prefixed used for named
 561          * hierarchies, if it is specified. */
 562
 563         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 564                 if (cg_hybrid_unified() > 0)
 565                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 566                 else
 567                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 568         }
 569
 570         e = startswith(controller, "name=");
 571         if (e)
 572                 return e;
 573
 574         return controller;
 575 }
 576
 577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 578         const char *dn;
 579         char *t = NULL;
 580
 581         assert(fs);
 582         assert(controller);
 583
 584         dn = controller_to_dirname(controller);
 585
 586         if (isempty(path) && isempty(suffix))
 587                 t = strappend("/sys/fs/cgroup/", dn);
 588         else if (isempty(path))
 589                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 590         else if (isempty(suffix))
 591                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 592         else
 593                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 594         if (!t)
 595                 return -ENOMEM;
 596
 597         *fs = t;
 598         return 0;
 599 }
 600
 601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 602         char *t;
 603
 604         assert(fs);
 605
 606         if (isempty(path) && isempty(suffix))
 607                 t = strdup("/sys/fs/cgroup");
 608         else if (isempty(path))
 609                 t = strappend("/sys/fs/cgroup/", suffix);
 610         else if (isempty(suffix))
 611                 t = strappend("/sys/fs/cgroup/", path);
 612         else
 613                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 614         if (!t)
 615                 return -ENOMEM;
 616
 617         *fs = t;
 618         return 0;
 619 }
 620
 621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 622         int r;
 623
 624         assert(fs);
 625
 626         if (!controller) {
 627                 char *t;
 628
 629                 /* If no controller is specified, we return the path
 630                  * *below* the controllers, without any prefix. */
 631
 632                 if (!path && !suffix)
 633                         return -EINVAL;
 634
 635                 if (!suffix)
 636                         t = strdup(path);
 637                 else if (!path)
 638                         t = strdup(suffix);
 639                 else
 640                         t = strjoin(path, "/", suffix);
 641                 if (!t)
 642                         return -ENOMEM;
 643
 644                 *fs = path_kill_slashes(t);
 645                 return 0;
 646         }
 647
 648         if (!cg_controller_is_valid(controller))
 649                 return -EINVAL;
 650
 651         r = cg_all_unified();
 652         if (r < 0)
 653                 return r;
 654         if (r > 0)
 655                 r = join_path_unified(path, suffix, fs);
 656         else
 657                 r = join_path_legacy(controller, path, suffix, fs);
 658         if (r < 0)
 659                 return r;
 660
 661         path_kill_slashes(*fs);
 662         return 0;
 663 }
 664
 665 static int controller_is_accessible(const char *controller) {
 666         int r;
 667
 668         assert(controller);
 669
 670         /* Checks whether a specific controller is accessible,
 671          * i.e. its hierarchy mounted. In the unified hierarchy all
 672          * controllers are considered accessible, except for the named
 673          * hierarchies */
 674
 675         if (!cg_controller_is_valid(controller))
 676                 return -EINVAL;
 677
 678         r = cg_all_unified();
 679         if (r < 0)
 680                 return r;
 681         if (r > 0) {
 682                 /* We don't support named hierarchies if we are using
 683                  * the unified hierarchy. */
 684
 685                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 686                         return 0;
 687
 688                 if (startswith(controller, "name="))
 689                         return -EOPNOTSUPP;
 690
 691         } else {
 692                 const char *cc, *dn;
 693
 694                 dn = controller_to_dirname(controller);
 695                 cc = strjoina("/sys/fs/cgroup/", dn);
 696
 697                 if (laccess(cc, F_OK) < 0)
 698                         return -errno;
 699         }
 700
 701         return 0;
 702 }
 703
 704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 705         int r;
 706
 707         assert(controller);
 708         assert(fs);
 709
 710         /* Check if the specified controller is actually accessible */
 711         r = controller_is_accessible(controller);
 712         if (r < 0)
 713                 return r;
 714
 715         return cg_get_path(controller, path, suffix, fs);
 716 }
 717
 718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 719         assert(path);
 720         assert(sb);
 721         assert(ftwbuf);
 722
 723         if (typeflag != FTW_DP)
 724                 return 0;
 725
 726         if (ftwbuf->level < 1)
 727                 return 0;
 728
 729         (void) rmdir(path);
 730         return 0;
 731 }
 732
 733 int cg_trim(const char *controller, const char *path, bool delete_root) {
 734         _cleanup_free_ char *fs = NULL;
 735         int r = 0, q;
 736
 737         assert(path);
 738
 739         r = cg_get_path(controller, path, NULL, &fs);
 740         if (r < 0)
 741                 return r;
 742
 743         errno = 0;
 744         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 745                 if (errno == ENOENT)
 746                         r = 0;
 747                 else if (errno > 0)
 748                         r = -errno;
 749                 else
 750                         r = -EIO;
 751         }
 752
 753         if (delete_root) {
 754                 if (rmdir(fs) < 0 && errno != ENOENT)
 755                         return -errno;
 756         }
 757
 758         q = cg_hybrid_unified();
 759         if (q < 0)
 760                 return q;
 761         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 762                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 763                 if (q < 0)
 764                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 765         }
 766
 767         return r;
 768 }
 769
 770 int cg_create(const char *controller, const char *path) {
 771         _cleanup_free_ char *fs = NULL;
 772         int r;
 773
 774         r = cg_get_path_and_check(controller, path, NULL, &fs);
 775         if (r < 0)
 776                 return r;
 777
 778         r = mkdir_parents(fs, 0755);
 779         if (r < 0)
 780                 return r;
 781
 782         if (mkdir(fs, 0755) < 0) {
 783
 784                 if (errno == EEXIST)
 785                         return 0;
 786
 787                 return -errno;
 788         }
 789
 790         r = cg_hybrid_unified();
 791         if (r < 0)
 792                 return r;
 793
 794         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 795                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 796                 if (r < 0)
 797                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 798         }
 799
 800         return 1;
 801 }
 802
 803 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 804         int r, q;
 805
 806         assert(pid >= 0);
 807
 808         r = cg_create(controller, path);
 809         if (r < 0)
 810                 return r;
 811
 812         q = cg_attach(controller, path, pid);
 813         if (q < 0)
 814                 return q;
 815
 816         /* This does not remove the cgroup on failure */
 817         return r;
 818 }
 819
 820 int cg_attach(const char *controller, const char *path, pid_t pid) {
 821         _cleanup_free_ char *fs = NULL;
 822         char c[DECIMAL_STR_MAX(pid_t) + 2];
 823         int r;
 824
 825         assert(path);
 826         assert(pid >= 0);
 827
 828         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 829         if (r < 0)
 830                 return r;
 831
 832         if (pid == 0)
 833                 pid = getpid();
 834
 835         xsprintf(c, PID_FMT "\n", pid);
 836
 837         r = write_string_file(fs, c, 0);
 838         if (r < 0)
 839                 return r;
 840
 841         r = cg_hybrid_unified();
 842         if (r < 0)
 843                 return r;
 844
 845         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 846                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 847                 if (r < 0)
 848                         log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path);
 849         }
 850
 851         return 0;
 852 }
 853
 854 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 855         int r;
 856
 857         assert(controller);
 858         assert(path);
 859         assert(pid >= 0);
 860
 861         r = cg_attach(controller, path, pid);
 862         if (r < 0) {
 863                 char prefix[strlen(path) + 1];
 864
 865                 /* This didn't work? Then let's try all prefixes of
 866                  * the destination */
 867
 868                 PATH_FOREACH_PREFIX(prefix, path) {
 869                         int q;
 870
 871                         q = cg_attach(controller, prefix, pid);
 872                         if (q >= 0)
 873                                 return q;
 874                 }
 875         }
 876
 877         return r;
 878 }
 879
 880 #if 0 /// UNNEEDED by elogind
 881 int cg_set_group_access(
 882                 const char *controller,
 883                 const char *path,
 884                 mode_t mode,
 885                 uid_t uid,
 886                 gid_t gid) {
 887
 888         _cleanup_free_ char *fs = NULL;
 889         int r;
 890
 891         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 892                 return 0;
 893
 894         if (mode != MODE_INVALID)
 895                 mode &= 0777;
 896
 897         r = cg_get_path(controller, path, NULL, &fs);
 898         if (r < 0)
 899                 return r;
 900
 901         r = chmod_and_chown(fs, mode, uid, gid);
 902         if (r < 0)
 903                 return r;
 904
 905         r = cg_hybrid_unified();
 906         if (r < 0)
 907                 return r;
 908         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 909                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 910                 if (r < 0)
 911                         log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path);
 912         }
 913
 914         return 0;
 915 }
 916
 917 int cg_set_task_access(
 918                 const char *controller,
 919                 const char *path,
 920                 mode_t mode,
 921                 uid_t uid,
 922                 gid_t gid) {
 923
 924         _cleanup_free_ char *fs = NULL, *procs = NULL;
 925         int r;
 926
 927         assert(path);
 928
 929         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 930                 return 0;
 931
 932         if (mode != MODE_INVALID)
 933                 mode &= 0666;
 934
 935         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 936         if (r < 0)
 937                 return r;
 938
 939         r = chmod_and_chown(fs, mode, uid, gid);
 940         if (r < 0)
 941                 return r;
 942
 943         r = cg_unified_controller(controller);
 944         if (r < 0)
 945                 return r;
 946         if (r == 0) {
 947                 /* Compatibility, Always keep values for "tasks" in sync with
 948                  * "cgroup.procs" */
 949                 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 950                         (void) chmod_and_chown(procs, mode, uid, gid);
 951         }
 952
 953         r = cg_hybrid_unified();
 954         if (r < 0)
 955                 return r;
 956         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 957                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 958                 if (r < 0)
 959                         log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path);
 960         }
 961
 962         return 0;
 963 }
 964
 965 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 966         _cleanup_free_ char *fs = NULL;
 967         int r;
 968
 969         assert(path);
 970         assert(name);
 971         assert(value || size <= 0);
 972
 973         r = cg_get_path(controller, path, NULL, &fs);
 974         if (r < 0)
 975                 return r;
 976
 977         if (setxattr(fs, name, value, size, flags) < 0)
 978                 return -errno;
 979
 980         return 0;
 981 }
 982
 983 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
 984         _cleanup_free_ char *fs = NULL;
 985         ssize_t n;
 986         int r;
 987
 988         assert(path);
 989         assert(name);
 990
 991         r = cg_get_path(controller, path, NULL, &fs);
 992         if (r < 0)
 993                 return r;
 994
 995         n = getxattr(fs, name, value, size);
 996         if (n < 0)
 997                 return -errno;
 998
 999         return (int) n;
1000 }
1001 #endif // 0
1002
1003 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1004         _cleanup_fclose_ FILE *f = NULL;
1005         char line[LINE_MAX];
1006         const char *fs, *controller_str;
1007         size_t cs = 0;
1008         int unified;
1009
1010         assert(path);
1011         assert(pid >= 0);
1012
1013         if (controller) {
1014                 if (!cg_controller_is_valid(controller))
1015                         return -EINVAL;
1016         } else
1017                 controller = SYSTEMD_CGROUP_CONTROLLER;
1018
1019         unified = cg_unified_controller(controller);
1020         if (unified < 0)
1021                 return unified;
1022         if (unified == 0) {
1023                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1024                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1025                 else
1026                         controller_str = controller;
1027
1028                 cs = strlen(controller_str);
1029         }
1030
1031         fs = procfs_file_alloca(pid, "cgroup");
1032         log_debug_elogind("Searching for PID %u in \"%s\" (controller \"%s\")",
1033                           pid, fs, controller);
1034         f = fopen(fs, "re");
1035         if (!f)
1036                 return errno == ENOENT ? -ESRCH : -errno;
1037
1038         FOREACH_LINE(line, f, return -errno) {
1039                 char *e, *p;
1040
1041                 truncate_nl(line);
1042
1043                 if (unified) {
1044                         e = startswith(line, "0:");
1045                         if (!e)
1046                                 continue;
1047
1048                         e = strchr(e, ':');
1049                         if (!e)
1050                                 continue;
1051                 } else {
1052                         char *l;
1053                         size_t k;
1054                         const char *word, *state;
1055                         bool found = false;
1056
1057                         l = strchr(line, ':');
1058                         if (!l)
1059                                 continue;
1060
1061                         l++;
1062                         e = strchr(l, ':');
1063                         if (!e)
1064                                 continue;
1065
1066                         *e = 0;
1067                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1068                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1069                                         found = true;
1070                                         break;
1071                                 }
1072                         }
1073
1074                         if (!found)
1075                                 continue;
1076                 }
1077
1078                 log_debug_elogind("Found %s:%s", line, e+1);
1079                 p = strdup(e + 1);
1080                 if (!p)
1081                         return -ENOMEM;
1082
1083                 *path = p;
1084                 return 0;
1085         }
1086
1087         return -ENODATA;
1088 }
1089
1090 int cg_install_release_agent(const char *controller, const char *agent) {
1091         _cleanup_free_ char *fs = NULL, *contents = NULL;
1092         const char *sc;
1093         int r;
1094
1095         assert(agent);
1096
1097         r = cg_unified_controller(controller);
1098         if (r < 0)
1099                 return r;
1100         if (r > 0) /* doesn't apply to unified hierarchy */
1101                 return -EOPNOTSUPP;
1102
1103         r = cg_get_path(controller, NULL, "release_agent", &fs);
1104         if (r < 0)
1105                 return r;
1106
1107         r = read_one_line_file(fs, &contents);
1108         if (r < 0)
1109                 return r;
1110
1111         sc = strstrip(contents);
1112         if (isempty(sc)) {
1113                 r = write_string_file(fs, agent, 0);
1114                 if (r < 0)
1115                         return r;
1116         } else if (!path_equal(sc, agent))
1117                 return -EEXIST;
1118
1119         fs = mfree(fs);
1120         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1121         if (r < 0)
1122                 return r;
1123
1124         contents = mfree(contents);
1125         r = read_one_line_file(fs, &contents);
1126         if (r < 0)
1127                 return r;
1128
1129         sc = strstrip(contents);
1130         if (streq(sc, "0")) {
1131                 r = write_string_file(fs, "1", 0);
1132                 if (r < 0)
1133                         return r;
1134
1135                 return 1;
1136         }
1137
1138         if (!streq(sc, "1"))
1139                 return -EIO;
1140
1141         return 0;
1142 }
1143
1144 int cg_uninstall_release_agent(const char *controller) {
1145         _cleanup_free_ char *fs = NULL;
1146         int r;
1147
1148         r = cg_unified_controller(controller);
1149         if (r < 0)
1150                 return r;
1151         if (r > 0) /* Doesn't apply to unified hierarchy */
1152                 return -EOPNOTSUPP;
1153
1154         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1155         if (r < 0)
1156                 return r;
1157
1158         r = write_string_file(fs, "0", 0);
1159         if (r < 0)
1160                 return r;
1161
1162         fs = mfree(fs);
1163
1164         r = cg_get_path(controller, NULL, "release_agent", &fs);
1165         if (r < 0)
1166                 return r;
1167
1168         r = write_string_file(fs, "", 0);
1169         if (r < 0)
1170                 return r;
1171
1172         return 0;
1173 }
1174
1175 int cg_is_empty(const char *controller, const char *path) {
1176         _cleanup_fclose_ FILE *f = NULL;
1177         pid_t pid;
1178         int r;
1179
1180         assert(path);
1181
1182         r = cg_enumerate_processes(controller, path, &f);
1183         if (r == -ENOENT)
1184                 return 1;
1185         if (r < 0)
1186                 return r;
1187
1188         r = cg_read_pid(f, &pid);
1189         if (r < 0)
1190                 return r;
1191
1192         return r == 0;
1193 }
1194
1195 int cg_is_empty_recursive(const char *controller, const char *path) {
1196         int r;
1197
1198         assert(path);
1199
1200         /* The root cgroup is always populated */
1201         if (controller && (isempty(path) || path_equal(path, "/")))
1202                 return false;
1203
1204         r = cg_unified_controller(controller);
1205         if (r < 0)
1206                 return r;
1207         if (r > 0) {
1208                 _cleanup_free_ char *t = NULL;
1209
1210                 /* On the unified hierarchy we can check empty state
1211                  * via the "populated" attribute of "cgroup.events". */
1212
1213                 r = cg_read_event(controller, path, "populated", &t);
1214                 if (r < 0)
1215                         return r;
1216
1217                 return streq(t, "0");
1218         } else {
1219                 _cleanup_closedir_ DIR *d = NULL;
1220                 char *fn;
1221
1222                 r = cg_is_empty(controller, path);
1223                 if (r <= 0)
1224                         return r;
1225
1226                 r = cg_enumerate_subgroups(controller, path, &d);
1227                 if (r == -ENOENT)
1228                         return 1;
1229                 if (r < 0)
1230                         return r;
1231
1232                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1233                         _cleanup_free_ char *p = NULL;
1234
1235                         p = strjoin(path, "/", fn);
1236                         free(fn);
1237                         if (!p)
1238                                 return -ENOMEM;
1239
1240                         r = cg_is_empty_recursive(controller, p);
1241                         if (r <= 0)
1242                                 return r;
1243                 }
1244                 if (r < 0)
1245                         return r;
1246
1247                 return true;
1248         }
1249 }
1250
1251 int cg_split_spec(const char *spec, char **controller, char **path) {
1252         char *t = NULL, *u = NULL;
1253         const char *e;
1254
1255         assert(spec);
1256
1257         if (*spec == '/') {
1258                 if (!path_is_safe(spec))
1259                         return -EINVAL;
1260
1261                 if (path) {
1262                         t = strdup(spec);
1263                         if (!t)
1264                                 return -ENOMEM;
1265
1266                         *path = path_kill_slashes(t);
1267                 }
1268
1269                 if (controller)
1270                         *controller = NULL;
1271
1272                 return 0;
1273         }
1274
1275         e = strchr(spec, ':');
1276         if (!e) {
1277                 if (!cg_controller_is_valid(spec))
1278                         return -EINVAL;
1279
1280                 if (controller) {
1281                         t = strdup(spec);
1282                         if (!t)
1283                                 return -ENOMEM;
1284
1285                         *controller = t;
1286                 }
1287
1288                 if (path)
1289                         *path = NULL;
1290
1291                 return 0;
1292         }
1293
1294         t = strndup(spec, e-spec);
1295         if (!t)
1296                 return -ENOMEM;
1297         if (!cg_controller_is_valid(t)) {
1298                 free(t);
1299                 return -EINVAL;
1300         }
1301
1302         if (isempty(e+1))
1303                 u = NULL;
1304         else {
1305                 u = strdup(e+1);
1306                 if (!u) {
1307                         free(t);
1308                         return -ENOMEM;
1309                 }
1310
1311                 if (!path_is_safe(u) ||
1312                     !path_is_absolute(u)) {
1313                         free(t);
1314                         free(u);
1315                         return -EINVAL;
1316                 }
1317
1318                 path_kill_slashes(u);
1319         }
1320
1321         if (controller)
1322                 *controller = t;
1323         else
1324                 free(t);
1325
1326         if (path)
1327                 *path = u;
1328         else
1329                 free(u);
1330
1331         return 0;
1332 }
1333
1334 int cg_mangle_path(const char *path, char **result) {
1335         _cleanup_free_ char *c = NULL, *p = NULL;
1336         char *t;
1337         int r;
1338
1339         assert(path);
1340         assert(result);
1341
1342         /* First, check if it already is a filesystem path */
1343         if (path_startswith(path, "/sys/fs/cgroup")) {
1344
1345                 t = strdup(path);
1346                 if (!t)
1347                         return -ENOMEM;
1348
1349                 *result = path_kill_slashes(t);
1350                 return 0;
1351         }
1352
1353         /* Otherwise, treat it as cg spec */
1354         r = cg_split_spec(path, &c, &p);
1355         if (r < 0)
1356                 return r;
1357
1358         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1359 }
1360
1361 int cg_get_root_path(char **path) {
1362 #if 0 /// elogind does not support systemd scopes and slices
1363         char *p, *e;
1364         int r;
1365
1366         assert(path);
1367
1368         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1369         if (r < 0)
1370                 return r;
1371
1372         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1373         if (!e)
1374                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1375         if (!e)
1376                 e = endswith(p, "/system"); /* even more legacy */
1377         if (e)
1378                 *e = 0;
1379
1380         *path = p;
1381         return 0;
1382 #else
1383         assert(path);
1384         return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1385 #endif // 0
1386 }
1387
1388 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1389         _cleanup_free_ char *rt = NULL;
1390         char *p;
1391         int r;
1392
1393         assert(cgroup);
1394         assert(shifted);
1395
1396         if (!root) {
1397                 /* If the root was specified let's use that, otherwise
1398                  * let's determine it from PID 1 */
1399
1400                 r = cg_get_root_path(&rt);
1401                 if (r < 0)
1402                         return r;
1403
1404                 root = rt;
1405                 log_debug_elogind("Determined root path: \"%s\"", root);
1406         }
1407
1408         p = path_startswith(cgroup, root);
1409 #if 0 /// With other controllers, elogind might end up in /elogind, and *p is 0
1410         if (p && p > cgroup)
1411 #else
1412         if (p && p[0] && (p > cgroup))
1413 #endif // 0
1414                 *shifted = p - 1;
1415         else
1416                 *shifted = cgroup;
1417
1418         return 0;
1419 }
1420
1421 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1422         _cleanup_free_ char *raw = NULL;
1423         const char *c;
1424         int r;
1425
1426         assert(pid >= 0);
1427         assert(cgroup);
1428
1429         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1430         if (r < 0)
1431                 return r;
1432
1433         log_debug_elogind("Shifting path: \"%s\" (PID %u, root: \"%s\")",
1434                           raw, pid, root ? root : "NULL");
1435         r = cg_shift_path(raw, root, &c);
1436         if (r < 0)
1437                 return r;
1438
1439         if (c == raw) {
1440                 *cgroup = raw;
1441                 raw = NULL;
1442         } else {
1443                 char *n;
1444
1445                 n = strdup(c);
1446                 if (!n)
1447                         return -ENOMEM;
1448
1449                 *cgroup = n;
1450         }
1451         log_debug_elogind("Resulting cgroup:\"%s\"", *cgroup);
1452
1453         return 0;
1454 }
1455
1456 #if 0 /// UNNEEDED by elogind
1457 int cg_path_decode_unit(const char *cgroup, char **unit) {
1458         char *c, *s;
1459         size_t n;
1460
1461         assert(cgroup);
1462         assert(unit);
1463
1464         n = strcspn(cgroup, "/");
1465         if (n < 3)
1466                 return -ENXIO;
1467
1468         c = strndupa(cgroup, n);
1469         c = cg_unescape(c);
1470
1471         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1472                 return -ENXIO;
1473
1474         s = strdup(c);
1475         if (!s)
1476                 return -ENOMEM;
1477
1478         *unit = s;
1479         return 0;
1480 }
1481
1482 static bool valid_slice_name(const char *p, size_t n) {
1483
1484         if (!p)
1485                 return false;
1486
1487         if (n < strlen("x.slice"))
1488                 return false;
1489
1490         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1491                 char buf[n+1], *c;
1492
1493                 memcpy(buf, p, n);
1494                 buf[n] = 0;
1495
1496                 c = cg_unescape(buf);
1497
1498                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1499         }
1500
1501         return false;
1502 }
1503
1504 static const char *skip_slices(const char *p) {
1505         assert(p);
1506
1507         /* Skips over all slice assignments */
1508
1509         for (;;) {
1510                 size_t n;
1511
1512                 p += strspn(p, "/");
1513
1514                 n = strcspn(p, "/");
1515                 if (!valid_slice_name(p, n))
1516                         return p;
1517
1518                 p += n;
1519         }
1520 }
1521
1522 int cg_path_get_unit(const char *path, char **ret) {
1523         const char *e;
1524         char *unit;
1525         int r;
1526
1527         assert(path);
1528         assert(ret);
1529
1530         e = skip_slices(path);
1531
1532         r = cg_path_decode_unit(e, &unit);
1533         if (r < 0)
1534                 return r;
1535
1536         /* We skipped over the slices, don't accept any now */
1537         if (endswith(unit, ".slice")) {
1538                 free(unit);
1539                 return -ENXIO;
1540         }
1541
1542         *ret = unit;
1543         return 0;
1544 }
1545
1546 int cg_pid_get_unit(pid_t pid, char **unit) {
1547         _cleanup_free_ char *cgroup = NULL;
1548         int r;
1549
1550         assert(unit);
1551
1552         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1553         if (r < 0)
1554                 return r;
1555
1556         return cg_path_get_unit(cgroup, unit);
1557 }
1558
1559 /**
1560  * Skip session-*.scope, but require it to be there.
1561  */
1562 static const char *skip_session(const char *p) {
1563         size_t n;
1564
1565         if (isempty(p))
1566                 return NULL;
1567
1568         p += strspn(p, "/");
1569
1570         n = strcspn(p, "/");
1571         if (n < strlen("session-x.scope"))
1572                 return NULL;
1573
1574         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1575                 char buf[n - 8 - 6 + 1];
1576
1577                 memcpy(buf, p + 8, n - 8 - 6);
1578                 buf[n - 8 - 6] = 0;
1579
1580                 /* Note that session scopes never need unescaping,
1581                  * since they cannot conflict with the kernel's own
1582                  * names, hence we don't need to call cg_unescape()
1583                  * here. */
1584
1585                 if (!session_id_valid(buf))
1586                         return false;
1587
1588                 p += n;
1589                 p += strspn(p, "/");
1590                 return p;
1591         }
1592
1593         return NULL;
1594 }
1595
1596 /**
1597  * Skip user@*.service, but require it to be there.
1598  */
1599 static const char *skip_user_manager(const char *p) {
1600         size_t n;
1601
1602         if (isempty(p))
1603                 return NULL;
1604
1605         p += strspn(p, "/");
1606
1607         n = strcspn(p, "/");
1608         if (n < strlen("user@x.service"))
1609                 return NULL;
1610
1611         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1612                 char buf[n - 5 - 8 + 1];
1613
1614                 memcpy(buf, p + 5, n - 5 - 8);
1615                 buf[n - 5 - 8] = 0;
1616
1617                 /* Note that user manager services never need unescaping,
1618                  * since they cannot conflict with the kernel's own
1619                  * names, hence we don't need to call cg_unescape()
1620                  * here. */
1621
1622                 if (parse_uid(buf, NULL) < 0)
1623                         return NULL;
1624
1625                 p += n;
1626                 p += strspn(p, "/");
1627
1628                 return p;
1629         }
1630
1631         return NULL;
1632 }
1633
1634 static const char *skip_user_prefix(const char *path) {
1635         const char *e, *t;
1636
1637         assert(path);
1638
1639         /* Skip slices, if there are any */
1640         e = skip_slices(path);
1641
1642         /* Skip the user manager, if it's in the path now... */
1643         t = skip_user_manager(e);
1644         if (t)
1645                 return t;
1646
1647         /* Alternatively skip the user session if it is in the path... */
1648         return skip_session(e);
1649 }
1650
1651 int cg_path_get_user_unit(const char *path, char **ret) {
1652         const char *t;
1653
1654         assert(path);
1655         assert(ret);
1656
1657         t = skip_user_prefix(path);
1658         if (!t)
1659                 return -ENXIO;
1660
1661         /* And from here on it looks pretty much the same as for a
1662          * system unit, hence let's use the same parser from here
1663          * on. */
1664         return cg_path_get_unit(t, ret);
1665 }
1666
1667 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1668         _cleanup_free_ char *cgroup = NULL;
1669         int r;
1670
1671         assert(unit);
1672
1673         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1674         if (r < 0)
1675                 return r;
1676
1677         return cg_path_get_user_unit(cgroup, unit);
1678 }
1679
1680 int cg_path_get_machine_name(const char *path, char **machine) {
1681         _cleanup_free_ char *u = NULL;
1682         const char *sl;
1683         int r;
1684
1685         r = cg_path_get_unit(path, &u);
1686         if (r < 0)
1687                 return r;
1688
1689         sl = strjoina("/run/systemd/machines/unit:", u);
1690         return readlink_malloc(sl, machine);
1691 }
1692
1693 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1694         _cleanup_free_ char *cgroup = NULL;
1695         int r;
1696
1697         assert(machine);
1698
1699         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1700         if (r < 0)
1701                 return r;
1702
1703         return cg_path_get_machine_name(cgroup, machine);
1704 }
1705 #endif // 0
1706
1707 int cg_path_get_session(const char *path, char **session) {
1708 #if 0 /// UNNEEDED by elogind
1709         _cleanup_free_ char *unit = NULL;
1710         char *start, *end;
1711         int r;
1712
1713         assert(path);
1714
1715         r = cg_path_get_unit(path, &unit);
1716         if (r < 0)
1717                 return r;
1718
1719         start = startswith(unit, "session-");
1720         if (!start)
1721                 return -ENXIO;
1722         end = endswith(start, ".scope");
1723         if (!end)
1724                 return -ENXIO;
1725
1726         *end = 0;
1727         if (!session_id_valid(start))
1728                 return -ENXIO;
1729 #else
1730         /* Elogind uses a flat hierarchy, just "/SESSION".  The only
1731            wrinkle is that SESSION might be escaped.  */
1732         const char *e, *n, *start;
1733
1734         assert(path);
1735         log_debug_elogind("path is \"%s\"", path);
1736         assert(path[0] == '/');
1737
1738         e = path + 1;
1739         n = strchrnul(e, '/');
1740         if (e == n)
1741                 return -ENOENT;
1742
1743         start = strndupa(e, n - e);
1744         start = cg_unescape(start);
1745
1746         if (!start[0])
1747                 return -ENOENT;
1748 #endif // 0
1749
1750         if (session) {
1751                 char *rr;
1752
1753                 log_debug_elogind("found session: \"%s\"", start);
1754                 rr = strdup(start);
1755                 if (!rr)
1756                         return -ENOMEM;
1757
1758                 *session = rr;
1759         }
1760
1761         return 0;
1762 }
1763
1764 int cg_pid_get_session(pid_t pid, char **session) {
1765         _cleanup_free_ char *cgroup = NULL;
1766         int r;
1767
1768         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1769         if (r < 0)
1770                 return r;
1771
1772         return cg_path_get_session(cgroup, session);
1773 }
1774
1775 #if 0 /// UNNEEDED by elogind
1776 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1777         _cleanup_free_ char *slice = NULL;
1778         char *start, *end;
1779         int r;
1780
1781         assert(path);
1782
1783         r = cg_path_get_slice(path, &slice);
1784         if (r < 0)
1785                 return r;
1786
1787         start = startswith(slice, "user-");
1788         if (!start)
1789                 return -ENXIO;
1790         end = endswith(start, ".slice");
1791         if (!end)
1792                 return -ENXIO;
1793
1794         *end = 0;
1795         if (parse_uid(start, uid) < 0)
1796                 return -ENXIO;
1797
1798         return 0;
1799 }
1800
1801 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1802         _cleanup_free_ char *cgroup = NULL;
1803         int r;
1804
1805         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1806         if (r < 0)
1807                 return r;
1808
1809         return cg_path_get_owner_uid(cgroup, uid);
1810 }
1811
1812 int cg_path_get_slice(const char *p, char **slice) {
1813         const char *e = NULL;
1814
1815         assert(p);
1816         assert(slice);
1817
1818         /* Finds the right-most slice unit from the beginning, but
1819          * stops before we come to the first non-slice unit. */
1820
1821         for (;;) {
1822                 size_t n;
1823
1824                 p += strspn(p, "/");
1825
1826                 n = strcspn(p, "/");
1827                 if (!valid_slice_name(p, n)) {
1828
1829                         if (!e) {
1830                                 char *s;
1831
1832                                 s = strdup(SPECIAL_ROOT_SLICE);
1833                                 if (!s)
1834                                         return -ENOMEM;
1835
1836                                 *slice = s;
1837                                 return 0;
1838                         }
1839
1840                         return cg_path_decode_unit(e, slice);
1841                 }
1842
1843                 e = p;
1844                 p += n;
1845         }
1846 }
1847
1848 int cg_pid_get_slice(pid_t pid, char **slice) {
1849         _cleanup_free_ char *cgroup = NULL;
1850         int r;
1851
1852         assert(slice);
1853
1854         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1855         if (r < 0)
1856                 return r;
1857
1858         return cg_path_get_slice(cgroup, slice);
1859 }
1860
1861 int cg_path_get_user_slice(const char *p, char **slice) {
1862         const char *t;
1863         assert(p);
1864         assert(slice);
1865
1866         t = skip_user_prefix(p);
1867         if (!t)
1868                 return -ENXIO;
1869
1870         /* And now it looks pretty much the same as for a system
1871          * slice, so let's just use the same parser from here on. */
1872         return cg_path_get_slice(t, slice);
1873 }
1874
1875 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1876         _cleanup_free_ char *cgroup = NULL;
1877         int r;
1878
1879         assert(slice);
1880
1881         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1882         if (r < 0)
1883                 return r;
1884
1885         return cg_path_get_user_slice(cgroup, slice);
1886 }
1887 #endif // 0
1888
1889 char *cg_escape(const char *p) {
1890         bool need_prefix = false;
1891
1892         /* This implements very minimal escaping for names to be used
1893          * as file names in the cgroup tree: any name which might
1894          * conflict with a kernel name or is prefixed with '_' is
1895          * prefixed with a '_'. That way, when reading cgroup names it
1896          * is sufficient to remove a single prefixing underscore if
1897          * there is one. */
1898
1899         /* The return value of this function (unlike cg_unescape())
1900          * needs free()! */
1901
1902         if (p[0] == 0 ||
1903             p[0] == '_' ||
1904             p[0] == '.' ||
1905             streq(p, "notify_on_release") ||
1906             streq(p, "release_agent") ||
1907             streq(p, "tasks") ||
1908             startswith(p, "cgroup."))
1909                 need_prefix = true;
1910         else {
1911                 const char *dot;
1912
1913                 dot = strrchr(p, '.');
1914                 if (dot) {
1915                         CGroupController c;
1916                         size_t l = dot - p;
1917
1918                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1919                                 const char *n;
1920
1921                                 n = cgroup_controller_to_string(c);
1922
1923                                 if (l != strlen(n))
1924                                         continue;
1925
1926                                 if (memcmp(p, n, l) != 0)
1927                                         continue;
1928
1929                                 need_prefix = true;
1930                                 break;
1931                         }
1932                 }
1933         }
1934
1935         if (need_prefix)
1936                 return strappend("_", p);
1937
1938         return strdup(p);
1939 }
1940
1941 char *cg_unescape(const char *p) {
1942         assert(p);
1943
1944         /* The return value of this function (unlike cg_escape())
1945          * doesn't need free()! */
1946
1947         if (p[0] == '_')
1948                 return (char*) p+1;
1949
1950         return (char*) p;
1951 }
1952
1953 #define CONTROLLER_VALID                        \
1954         DIGITS LETTERS                          \
1955         "_"
1956
1957 bool cg_controller_is_valid(const char *p) {
1958         const char *t, *s;
1959
1960         if (!p)
1961                 return false;
1962
1963         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1964                 return true;
1965
1966         s = startswith(p, "name=");
1967         if (s)
1968                 p = s;
1969
1970         if (*p == 0 || *p == '_')
1971                 return false;
1972
1973         for (t = p; *t; t++)
1974                 if (!strchr(CONTROLLER_VALID, *t))
1975                         return false;
1976
1977         if (t - p > FILENAME_MAX)
1978                 return false;
1979
1980         return true;
1981 }
1982
1983 #if 0 /// UNNEEDED by elogind
1984 int cg_slice_to_path(const char *unit, char **ret) {
1985         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1986         const char *dash;
1987         int r;
1988
1989         assert(unit);
1990         assert(ret);
1991
1992         if (streq(unit, SPECIAL_ROOT_SLICE)) {
1993                 char *x;
1994
1995                 x = strdup("");
1996                 if (!x)
1997                         return -ENOMEM;
1998                 *ret = x;
1999                 return 0;
2000         }
2001
2002         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
2003                 return -EINVAL;
2004
2005         if (!endswith(unit, ".slice"))
2006                 return -EINVAL;
2007
2008         r = unit_name_to_prefix(unit, &p);
2009         if (r < 0)
2010                 return r;
2011
2012         dash = strchr(p, '-');
2013
2014         /* Don't allow initial dashes */
2015         if (dash == p)
2016                 return -EINVAL;
2017
2018         while (dash) {
2019                 _cleanup_free_ char *escaped = NULL;
2020                 char n[dash - p + sizeof(".slice")];
2021
2022                 /* Don't allow trailing or double dashes */
2023                 if (dash[1] == 0 || dash[1] == '-')
2024                         return -EINVAL;
2025
2026                 strcpy(stpncpy(n, p, dash - p), ".slice");
2027                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
2028                         return -EINVAL;
2029
2030                 escaped = cg_escape(n);
2031                 if (!escaped)
2032                         return -ENOMEM;
2033
2034                 if (!strextend(&s, escaped, "/", NULL))
2035                         return -ENOMEM;
2036
2037                 dash = strchr(dash+1, '-');
2038         }
2039
2040         e = cg_escape(unit);
2041         if (!e)
2042                 return -ENOMEM;
2043
2044         if (!strextend(&s, e, NULL))
2045                 return -ENOMEM;
2046
2047         *ret = s;
2048         s = NULL;
2049
2050         return 0;
2051 }
2052 #endif // 0
2053
2054 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2055         _cleanup_free_ char *p = NULL;
2056         int r;
2057
2058         r = cg_get_path(controller, path, attribute, &p);
2059         if (r < 0)
2060                 return r;
2061
2062         return write_string_file(p, value, 0);
2063 }
2064
2065 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2066         _cleanup_free_ char *p = NULL;
2067         int r;
2068
2069         r = cg_get_path(controller, path, attribute, &p);
2070         if (r < 0)
2071                 return r;
2072
2073         return read_one_line_file(p, ret);
2074 }
2075
2076 #if 0 /// UNNEEDED by elogind
2077 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2078         _cleanup_free_ char *filename = NULL, *content = NULL;
2079         char *line, *p;
2080         int i, r;
2081
2082         for (i = 0; keys[i]; i++)
2083                 values[i] = NULL;
2084
2085         r = cg_get_path(controller, path, attribute, &filename);
2086         if (r < 0)
2087                 return r;
2088
2089         r = read_full_file(filename, &content, NULL);
2090         if (r < 0)
2091                 return r;
2092
2093         p = content;
2094         while ((line = strsep(&p, "\n"))) {
2095                 char *key;
2096
2097                 key = strsep(&line, " ");
2098
2099                 for (i = 0; keys[i]; i++) {
2100                         if (streq(key, keys[i])) {
2101                                 values[i] = strdup(line);
2102                                 break;
2103                         }
2104                 }
2105         }
2106
2107         for (i = 0; keys[i]; i++) {
2108                 if (!values[i]) {
2109                         for (i = 0; keys[i]; i++) {
2110                                 free(values[i]);
2111                                 values[i] = NULL;
2112                         }
2113                         return -ENOENT;
2114                 }
2115         }
2116
2117         return 0;
2118 }
2119
2120 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2121         CGroupController c;
2122         int r;
2123
2124         /* This one will create a cgroup in our private tree, but also
2125          * duplicate it in the trees specified in mask, and remove it
2126          * in all others */
2127
2128         /* First create the cgroup in our own hierarchy. */
2129         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2130         if (r < 0)
2131                 return r;
2132
2133         /* If we are in the unified hierarchy, we are done now */
2134         r = cg_all_unified();
2135         if (r < 0)
2136                 return r;
2137         if (r > 0)
2138                 return 0;
2139
2140         /* Otherwise, do the same in the other hierarchies */
2141         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2142                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2143                 const char *n;
2144
2145                 n = cgroup_controller_to_string(c);
2146
2147                 if (mask & bit)
2148                         (void) cg_create(n, path);
2149                 else if (supported & bit)
2150                         (void) cg_trim(n, path, true);
2151         }
2152
2153         return 0;
2154 }
2155
2156 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2157         CGroupController c;
2158         int r;
2159
2160         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2161         if (r < 0)
2162                 return r;
2163
2164         r = cg_all_unified();
2165         if (r < 0)
2166                 return r;
2167         if (r > 0)
2168                 return 0;
2169
2170         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2171                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2172                 const char *p = NULL;
2173
2174                 if (!(supported & bit))
2175                         continue;
2176
2177                 if (path_callback)
2178                         p = path_callback(bit, userdata);
2179
2180                 if (!p)
2181                         p = path;
2182
2183                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2184         }
2185
2186         return 0;
2187 }
2188
2189 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2190         Iterator i;
2191         void *pidp;
2192         int r = 0;
2193
2194         SET_FOREACH(pidp, pids, i) {
2195                 pid_t pid = PTR_TO_PID(pidp);
2196                 int q;
2197
2198                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2199                 if (q < 0 && r >= 0)
2200                         r = q;
2201         }
2202
2203         return r;
2204 }
2205
2206 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2207         CGroupController c;
2208         int r = 0, q;
2209
2210         if (!path_equal(from, to))  {
2211                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2212                 if (r < 0)
2213                         return r;
2214         }
2215
2216         q = cg_all_unified();
2217         if (q < 0)
2218                 return q;
2219         if (q > 0)
2220                 return r;
2221
2222         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2223                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2224                 const char *p = NULL;
2225
2226                 if (!(supported & bit))
2227                         continue;
2228
2229                 if (to_callback)
2230                         p = to_callback(bit, userdata);
2231
2232                 if (!p)
2233                         p = to;
2234
2235                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2236         }
2237
2238         return 0;
2239 }
2240
2241 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2242         CGroupController c;
2243         int r, q;
2244
2245         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2246         if (r < 0)
2247                 return r;
2248
2249         q = cg_all_unified();
2250         if (q < 0)
2251                 return q;
2252         if (q > 0)
2253                 return r;
2254
2255         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2256                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2257
2258                 if (!(supported & bit))
2259                         continue;
2260
2261                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2262         }
2263
2264         return 0;
2265 }
2266 #endif // 0
2267
2268 int cg_mask_supported(CGroupMask *ret) {
2269         CGroupMask mask = 0;
2270         int r;
2271
2272         /* Determines the mask of supported cgroup controllers. Only
2273          * includes controllers we can make sense of and that are
2274          * actually accessible. */
2275
2276         r = cg_all_unified();
2277         if (r < 0)
2278                 return r;
2279         if (r > 0) {
2280                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2281                 const char *c;
2282
2283                 /* In the unified hierarchy we can read the supported
2284                  * and accessible controllers from a the top-level
2285                  * cgroup attribute */
2286
2287                 r = cg_get_root_path(&root);
2288                 if (r < 0)
2289                         return r;
2290
2291                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2292                 if (r < 0)
2293                         return r;
2294
2295                 r = read_one_line_file(path, &controllers);
2296                 if (r < 0)
2297                         return r;
2298
2299                 c = controllers;
2300                 for (;;) {
2301                         _cleanup_free_ char *n = NULL;
2302                         CGroupController v;
2303
2304                         r = extract_first_word(&c, &n, NULL, 0);
2305                         if (r < 0)
2306                                 return r;
2307                         if (r == 0)
2308                                 break;
2309
2310                         v = cgroup_controller_from_string(n);
2311                         if (v < 0)
2312                                 continue;
2313
2314                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2315                 }
2316
2317                 /* Currently, we support the cpu, memory, io and pids
2318                  * controller in the unified hierarchy, mask
2319                  * everything else off. */
2320                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2321
2322         } else {
2323                 CGroupController c;
2324
2325                 /* In the legacy hierarchy, we check whether which
2326                  * hierarchies are mounted. */
2327
2328                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2329                         const char *n;
2330
2331                         n = cgroup_controller_to_string(c);
2332                         if (controller_is_accessible(n) >= 0)
2333                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2334                 }
2335         }
2336
2337         *ret = mask;
2338         return 0;
2339 }
2340
2341 #if 0 /// UNNEEDED by elogind
2342 int cg_kernel_controllers(Set *controllers) {
2343         _cleanup_fclose_ FILE *f = NULL;
2344         char buf[LINE_MAX];
2345         int r;
2346
2347         assert(controllers);
2348
2349         /* Determines the full list of kernel-known controllers. Might
2350          * include controllers we don't actually support, arbitrary
2351          * named hierarchies and controllers that aren't currently
2352          * accessible (because not mounted). */
2353
2354         f = fopen("/proc/cgroups", "re");
2355         if (!f) {
2356                 if (errno == ENOENT)
2357                         return 0;
2358                 return -errno;
2359         }
2360
2361         /* Ignore the header line */
2362         (void) fgets(buf, sizeof(buf), f);
2363
2364         for (;;) {
2365                 char *controller;
2366                 int enabled = 0;
2367
2368                 errno = 0;
2369                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2370
2371                         if (feof(f))
2372                                 break;
2373
2374                         if (ferror(f) && errno > 0)
2375                                 return -errno;
2376
2377                         return -EBADMSG;
2378                 }
2379
2380                 if (!enabled) {
2381                         free(controller);
2382                         continue;
2383                 }
2384
2385                 if (!cg_controller_is_valid(controller)) {
2386                         free(controller);
2387                         return -EBADMSG;
2388                 }
2389
2390                 r = set_consume(controllers, controller);
2391                 if (r < 0)
2392                         return r;
2393         }
2394
2395         return 0;
2396 }
2397 #endif // 0
2398
2399 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2400
2401 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2402  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2403  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2404  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2405  *
2406  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2407  * process management but disable the compat dual layout, we return %true on
2408  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2409  */
2410 static thread_local bool unified_systemd_v232;
2411
2412 static int cg_unified_update(void) {
2413
2414         struct statfs fs;
2415
2416         /* Checks if we support the unified hierarchy. Returns an
2417          * error when the cgroup hierarchies aren't mounted yet or we
2418          * have any other trouble determining if the unified hierarchy
2419          * is supported. */
2420
2421         if (unified_cache >= CGROUP_UNIFIED_NONE)
2422                 return 0;
2423
2424         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2425                 return -errno;
2426
2427 #if 0 /// UNNEEDED by elogind
2428         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2429                 unified_cache = CGROUP_UNIFIED_ALL;
2430         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2431                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2432                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2433                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2434                         unified_systemd_v232 = false;
2435                 } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 &&
2436                            F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2437                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2438                         unified_systemd_v232 = true;
2439                 } else {
2440                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2441                                 return -errno;
2442                         if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2443                                 return -ENOMEDIUM;
2444                         unified_cache = CGROUP_UNIFIED_NONE;
2445                 }
2446         } else
2447                 return -ENOMEDIUM;
2448 #else
2449         /* elogind can not support the unified hierarchy as a controller,
2450          * so always assume a classical hierarchy.
2451          * If, and only *if*, someone really wants to substitute systemd-login
2452          * in an environment managed by systemd with elogind, we might have to
2453          * add such a support. */
2454         unified_cache = CGROUP_UNIFIED_NONE;
2455 #endif // 0
2456
2457         return 0;
2458 }
2459
2460 int cg_unified_controller(const char *controller) {
2461         int r;
2462
2463         r = cg_unified_update();
2464         if (r < 0)
2465                 return r;
2466
2467         if (unified_cache == CGROUP_UNIFIED_NONE)
2468                 return false;
2469
2470         if (unified_cache >= CGROUP_UNIFIED_ALL)
2471                 return true;
2472
2473         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2474 }
2475
2476 int cg_all_unified(void) {
2477         int r;
2478
2479         r = cg_unified_update();
2480         if (r < 0)
2481                 return r;
2482
2483         return unified_cache >= CGROUP_UNIFIED_ALL;
2484 }
2485
2486 int cg_hybrid_unified(void) {
2487         int r;
2488
2489         r = cg_unified_update();
2490         if (r < 0)
2491                 return r;
2492
2493         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2494 }
2495
2496 int cg_unified_flush(void) {
2497         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2498
2499         return cg_unified_update();
2500 }
2501
2502 #if 0 /// UNNEEDED by elogind
2503 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2504         _cleanup_free_ char *fs = NULL;
2505         CGroupController c;
2506         int r;
2507
2508         assert(p);
2509
2510         if (supported == 0)
2511                 return 0;
2512
2513         r = cg_all_unified();
2514         if (r < 0)
2515                 return r;
2516         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2517                 return 0;
2518
2519         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2520         if (r < 0)
2521                 return r;
2522
2523         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2524                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2525                 const char *n;
2526
2527                 if (!(supported & bit))
2528                         continue;
2529
2530                 n = cgroup_controller_to_string(c);
2531                 {
2532                         char s[1 + strlen(n) + 1];
2533
2534                         s[0] = mask & bit ? '+' : '-';
2535                         strcpy(s + 1, n);
2536
2537                         r = write_string_file(fs, s, 0);
2538                         if (r < 0)
2539                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2540                 }
2541         }
2542
2543         return 0;
2544 }
2545
2546 bool cg_is_unified_wanted(void) {
2547         static thread_local int wanted = -1;
2548         int r;
2549         bool b;
2550         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2551
2552         /* If we have a cached value, return that. */
2553         if (wanted >= 0)
2554                 return wanted;
2555
2556         /* If the hierarchy is already mounted, then follow whatever
2557          * was chosen for it. */
2558         if (cg_unified_flush() >= 0)
2559                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2560
2561         /* Otherwise, let's see what the kernel command line has to say.
2562          * Since checking is expensive, cache a non-error result. */
2563         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2564
2565         return (wanted = r > 0 ? b : is_default);
2566 }
2567
2568 bool cg_is_legacy_wanted(void) {
2569         static thread_local int wanted = -1;
2570
2571         /* If we have a cached value, return that. */
2572         if (wanted >= 0)
2573                 return wanted;
2574
2575         /* Check if we have cgroups2 already mounted. */
2576         if (cg_unified_flush() >= 0 &&
2577             unified_cache == CGROUP_UNIFIED_ALL)
2578                 return (wanted = false);
2579
2580         /* Otherwise, assume that at least partial legacy is wanted,
2581          * since cgroups2 should already be mounted at this point. */
2582         return (wanted = true);
2583 }
2584
2585 bool cg_is_hybrid_wanted(void) {
2586         static thread_local int wanted = -1;
2587         int r;
2588         bool b;
2589         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2590         /* We default to true if the default is "hybrid", obviously,
2591          * but also when the default is "unified", because if we get
2592          * called, it means that unified hierarchy was not mounted. */
2593
2594         /* If we have a cached value, return that. */
2595         if (wanted >= 0)
2596                 return wanted;
2597
2598         /* If the hierarchy is already mounted, then follow whatever
2599          * was chosen for it. */
2600         if (cg_unified_flush() >= 0 &&
2601             unified_cache == CGROUP_UNIFIED_ALL)
2602                 return (wanted = false);
2603
2604         /* Otherwise, let's see what the kernel command line has to say.
2605          * Since checking is expensive, cache a non-error result. */
2606         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2607
2608         /* The meaning of the kernel option is reversed wrt. to the return value
2609          * of this function, hence the negation. */
2610         return (wanted = r > 0 ? !b : is_default);
2611 }
2612 #else
2613 bool cg_is_unified_wanted(void) {
2614         return false;
2615 }
2616 bool cg_is_legacy_wanted(void) {
2617         return true;
2618 }
2619 bool cg_is_hybrid_wanted(void) {
2620         return false;
2621 }
2622 #endif // 0
2623
2624 #if 0 /// UNNEEDED by elogind
2625 int cg_weight_parse(const char *s, uint64_t *ret) {
2626         uint64_t u;
2627         int r;
2628
2629         if (isempty(s)) {
2630                 *ret = CGROUP_WEIGHT_INVALID;
2631                 return 0;
2632         }
2633
2634         r = safe_atou64(s, &u);
2635         if (r < 0)
2636                 return r;
2637
2638         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2639                 return -ERANGE;
2640
2641         *ret = u;
2642         return 0;
2643 }
2644
2645 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2646         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2647         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2648         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2649         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2650 };
2651
2652 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2653         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2654         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2655         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2656         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2657 };
2658
2659 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2660
2661 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2662         uint64_t u;
2663         int r;
2664
2665         if (isempty(s)) {
2666                 *ret = CGROUP_CPU_SHARES_INVALID;
2667                 return 0;
2668         }
2669
2670         r = safe_atou64(s, &u);
2671         if (r < 0)
2672                 return r;
2673
2674         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2675                 return -ERANGE;
2676
2677         *ret = u;
2678         return 0;
2679 }
2680
2681 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2682         uint64_t u;
2683         int r;
2684
2685         if (isempty(s)) {
2686                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2687                 return 0;
2688         }
2689
2690         r = safe_atou64(s, &u);
2691         if (r < 0)
2692                 return r;
2693
2694         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2695                 return -ERANGE;
2696
2697         *ret = u;
2698         return 0;
2699 }
2700 #endif // 0
2701
2702 bool is_cgroup_fs(const struct statfs *s) {
2703         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2704                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2705 }
2706
2707 bool fd_is_cgroup_fs(int fd) {
2708         struct statfs s;
2709
2710         if (fstatfs(fd, &s) < 0)
2711                 return -errno;
2712
2713         return is_cgroup_fs(&s);
2714 }
2715
2716 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2717         [CGROUP_CONTROLLER_CPU] = "cpu",
2718         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2719         [CGROUP_CONTROLLER_IO] = "io",
2720         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2721         [CGROUP_CONTROLLER_MEMORY] = "memory",
2722         [CGROUP_CONTROLLER_DEVICES] = "devices",
2723         [CGROUP_CONTROLLER_PIDS] = "pids",
2724 };
2725
2726 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);