src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 // #include "unit-name.h"
  39 #include "fileio.h"
  40 // #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, PID_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         log_debug_elogind("Migrating \"%s\"/\"%s\" to \"%s\"/\"%s\" (%s)",
 301                           cfrom, pfrom, cto, pto,
 302                           ignore_self ? "ignoring self" : "watching self");
 303
 304         do {
 305                 _cleanup_fclose_ FILE *f = NULL;
 306                 pid_t pid = 0;
 307                 done = true;
 308
 309                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 310                 if (r < 0) {
 311                         if (ret >= 0 && r != -ENOENT)
 312                                 return r;
 313
 314                         return ret;
 315                 }
 316
 317                 while ((r = cg_read_pid(f, &pid)) > 0) {
 318
 319                         /* This might do weird stuff if we aren't a
 320                          * single-threaded program. However, we
 321                          * luckily know we are not */
 322                         if (ignore_self && pid == my_pid)
 323                                 continue;
 324
 325                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 326                                 continue;
 327
 328                         /* Ignore kernel threads. Since they can only
 329                          * exist in the root cgroup, we only check for
 330                          * them there. */
 331                         if (cfrom &&
 332                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 333                             is_kernel_thread(pid) > 0)
 334                                 continue;
 335
 336                         r = cg_attach(cto, pto, pid);
 337                         if (r < 0) {
 338                                 if (ret >= 0 && r != -ESRCH)
 339                                         ret = r;
 340                         } else if (ret == 0)
 341                                 ret = 1;
 342
 343                         done = false;
 344
 345                         r = set_put(s, PID_TO_PTR(pid));
 346                         if (r < 0) {
 347                                 if (ret >= 0)
 348                                         return r;
 349
 350                                 return ret;
 351                         }
 352                 }
 353
 354                 if (r < 0) {
 355                         if (ret >= 0)
 356                                 return r;
 357
 358                         return ret;
 359                 }
 360         } while (!done);
 361
 362         return ret;
 363 }
 364
 365 int cg_migrate_recursive(
 366                 const char *cfrom,
 367                 const char *pfrom,
 368                 const char *cto,
 369                 const char *pto,
 370                 bool ignore_self,
 371                 bool rem) {
 372
 373         _cleanup_closedir_ DIR *d = NULL;
 374         int r, ret = 0;
 375         char *fn;
 376
 377         assert(cfrom);
 378         assert(pfrom);
 379         assert(cto);
 380         assert(pto);
 381
 382         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 383
 384         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 385         if (r < 0) {
 386                 if (ret >= 0 && r != -ENOENT)
 387                         return r;
 388
 389                 return ret;
 390         }
 391
 392         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 393                 _cleanup_free_ char *p = NULL;
 394
 395                 p = strjoin(pfrom, "/", fn, NULL);
 396                 free(fn);
 397                 if (!p)
 398                                 return -ENOMEM;
 399
 400                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 401                 if (r != 0 && ret >= 0)
 402                         ret = r;
 403         }
 404
 405         if (r < 0 && ret >= 0)
 406                 ret = r;
 407
 408         if (rem) {
 409                 r = cg_rmdir(cfrom, pfrom);
 410                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 411                         return r;
 412         }
 413
 414         return ret;
 415 }
 416
 417 int cg_migrate_recursive_fallback(
 418                 const char *cfrom,
 419                 const char *pfrom,
 420                 const char *cto,
 421                 const char *pto,
 422                 bool ignore_self,
 423                 bool rem) {
 424
 425         int r;
 426
 427         assert(cfrom);
 428         assert(pfrom);
 429         assert(cto);
 430         assert(pto);
 431
 432         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 433         if (r < 0) {
 434                 char prefix[strlen(pto) + 1];
 435
 436                 /* This didn't work? Then let's try all prefixes of the destination */
 437
 438                 PATH_FOREACH_PREFIX(prefix, pto) {
 439                         int q;
 440
 441                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 442                         if (q >= 0)
 443                                 return q;
 444                 }
 445         }
 446
 447         return r;
 448 }
 449
 450 static const char *controller_to_dirname(const char *controller) {
 451         const char *e;
 452
 453         assert(controller);
 454
 455         /* Converts a controller name to the directory name below
 456          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 457          * just cuts off the name= prefixed used for named
 458          * hierarchies, if it is specified. */
 459
 460         e = startswith(controller, "name=");
 461         if (e)
 462                 return e;
 463
 464                 return controller;
 465 }
 466
 467 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 468         const char *dn;
 469         char *t = NULL;
 470
 471         assert(fs);
 472         assert(controller);
 473
 474         dn = controller_to_dirname(controller);
 475
 476         if (isempty(path) && isempty(suffix))
 477                 t = strappend("/sys/fs/cgroup/", dn);
 478         else if (isempty(path))
 479                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 480         else if (isempty(suffix))
 481                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 482                 else
 483                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 484         if (!t)
 485                 return -ENOMEM;
 486
 487         *fs = t;
 488         return 0;
 489         }
 490
 491 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 492         char *t;
 493
 494         assert(fs);
 495
 496         if (isempty(path) && isempty(suffix))
 497                 t = strdup("/sys/fs/cgroup");
 498         else if (isempty(path))
 499                 t = strappend("/sys/fs/cgroup/", suffix);
 500         else if (isempty(suffix))
 501                 t = strappend("/sys/fs/cgroup/", path);
 502         else
 503                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 504         if (!t)
 505                 return -ENOMEM;
 506
 507         *fs = t;
 508         return 0;
 509 }
 510
 511 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 512         int unified, r;
 513
 514         assert(fs);
 515
 516         if (!controller) {
 517                 char *t;
 518
 519                 /* If no controller is specified, we return the path
 520                  * *below* the controllers, without any prefix. */
 521
 522                 if (!path && !suffix)
 523                 return -EINVAL;
 524
 525                 if (!suffix)
 526                         t = strdup(path);
 527                 else if (!path)
 528                         t = strdup(suffix);
 529                 else
 530                         t = strjoin(path, "/", suffix, NULL);
 531                 if (!t)
 532                         return -ENOMEM;
 533
 534                 *fs = path_kill_slashes(t);
 535                 return 0;
 536         }
 537
 538         if (!cg_controller_is_valid(controller))
 539                 return -EINVAL;
 540
 541         unified = cg_unified();
 542         if (unified < 0)
 543                 return unified;
 544
 545         if (unified > 0)
 546                 r = join_path_unified(path, suffix, fs);
 547         else
 548                 r = join_path_legacy(controller, path, suffix, fs);
 549                 if (r < 0)
 550                         return r;
 551
 552         path_kill_slashes(*fs);
 553         return 0;
 554         }
 555
 556 static int controller_is_accessible(const char *controller) {
 557         int unified;
 558
 559         assert(controller);
 560
 561         /* Checks whether a specific controller is accessible,
 562          * i.e. its hierarchy mounted. In the unified hierarchy all
 563          * controllers are considered accessible, except for the named
 564          * hierarchies */
 565
 566         if (!cg_controller_is_valid(controller))
 567                 return -EINVAL;
 568
 569         unified = cg_unified();
 570         if (unified < 0)
 571                 return unified;
 572         if (unified > 0) {
 573                 /* We don't support named hierarchies if we are using
 574                  * the unified hierarchy. */
 575
 576                 if (streq(controller, ELOGIND_CGROUP_CONTROLLER))
 577                         return 0;
 578
 579                 if (startswith(controller, "name="))
 580                         return -EOPNOTSUPP;
 581
 582         } else {
 583                 const char *cc, *dn;
 584
 585                 dn = controller_to_dirname(controller);
 586                 cc = strjoina("/sys/fs/cgroup/", dn);
 587
 588                 if (laccess(cc, F_OK) < 0)
 589                         return -errno;
 590         }
 591
 592         return 0;
 593 }
 594
 595 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 596         int r;
 597
 598         assert(controller);
 599         assert(fs);
 600
 601         /* Check if the specified controller is actually accessible */
 602         r = controller_is_accessible(controller);
 603         if (r < 0)
 604                 return r;
 605
 606         return cg_get_path(controller, path, suffix, fs);
 607 }
 608
 609 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 610         assert(path);
 611         assert(sb);
 612         assert(ftwbuf);
 613
 614         if (typeflag != FTW_DP)
 615                 return 0;
 616
 617         if (ftwbuf->level < 1)
 618                 return 0;
 619
 620         (void) rmdir(path);
 621         return 0;
 622 }
 623
 624 int cg_trim(const char *controller, const char *path, bool delete_root) {
 625         _cleanup_free_ char *fs = NULL;
 626         int r = 0;
 627
 628         assert(path);
 629
 630         r = cg_get_path(controller, path, NULL, &fs);
 631         if (r < 0)
 632                 return r;
 633
 634         errno = 0;
 635         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 636                 if (errno == ENOENT)
 637                         r = 0;
 638                 else if (errno != 0)
 639                         r = -errno;
 640                 else
 641                         r = -EIO;
 642         }
 643
 644         if (delete_root) {
 645                 if (rmdir(fs) < 0 && errno != ENOENT)
 646                         return -errno;
 647         }
 648
 649         return r;
 650 }
 651
 652 int cg_create(const char *controller, const char *path) {
 653         _cleanup_free_ char *fs = NULL;
 654         int r;
 655
 656         r = cg_get_path_and_check(controller, path, NULL, &fs);
 657         if (r < 0)
 658                 return r;
 659
 660         r = mkdir_parents(fs, 0755);
 661         if (r < 0)
 662                 return r;
 663
 664         if (mkdir(fs, 0755) < 0) {
 665
 666                 if (errno == EEXIST)
 667                         return 0;
 668
 669                 return -errno;
 670         }
 671
 672         return 1;
 673 }
 674
 675 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 676         int r, q;
 677
 678         assert(pid >= 0);
 679
 680         r = cg_create(controller, path);
 681         if (r < 0)
 682                 return r;
 683
 684         q = cg_attach(controller, path, pid);
 685         if (q < 0)
 686                 return q;
 687
 688         /* This does not remove the cgroup on failure */
 689         return r;
 690 }
 691
 692 int cg_attach(const char *controller, const char *path, pid_t pid) {
 693         _cleanup_free_ char *fs = NULL;
 694         char c[DECIMAL_STR_MAX(pid_t) + 2];
 695         int r;
 696
 697         assert(path);
 698         assert(pid >= 0);
 699
 700         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 701         if (r < 0)
 702                 return r;
 703
 704         if (pid == 0)
 705                 pid = getpid();
 706
 707         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 708
 709         return write_string_file_no_create(fs, c);
 710 }
 711
 712 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 713         int r;
 714
 715         assert(controller);
 716         assert(path);
 717         assert(pid >= 0);
 718
 719         r = cg_attach(controller, path, pid);
 720         if (r < 0) {
 721                 char prefix[strlen(path) + 1];
 722
 723                 /* This didn't work? Then let's try all prefixes of
 724                  * the destination */
 725
 726                 PATH_FOREACH_PREFIX(prefix, path) {
 727                         int q;
 728
 729                         q = cg_attach(controller, prefix, pid);
 730                         if (q >= 0)
 731                                 return q;
 732                 }
 733         }
 734
 735         return r;
 736 }
 737
 738 /// UNNEEDED by elogind
 739 #if 0
 740 int cg_set_group_access(
 741                 const char *controller,
 742                 const char *path,
 743                 mode_t mode,
 744                 uid_t uid,
 745                 gid_t gid) {
 746
 747         _cleanup_free_ char *fs = NULL;
 748         int r;
 749
 750         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 751                 return 0;
 752
 753         if (mode != MODE_INVALID)
 754                 mode &= 0777;
 755
 756         r = cg_get_path(controller, path, NULL, &fs);
 757         if (r < 0)
 758                 return r;
 759
 760         return chmod_and_chown(fs, mode, uid, gid);
 761 }
 762
 763 int cg_set_task_access(
 764                 const char *controller,
 765                 const char *path,
 766                 mode_t mode,
 767                 uid_t uid,
 768                 gid_t gid) {
 769
 770         _cleanup_free_ char *fs = NULL, *procs = NULL;
 771         int r, unified;
 772
 773         assert(path);
 774
 775         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 776                 return 0;
 777
 778         if (mode != MODE_INVALID)
 779                 mode &= 0666;
 780
 781         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 782         if (r < 0)
 783                 return r;
 784
 785         r = chmod_and_chown(fs, mode, uid, gid);
 786         if (r < 0)
 787                 return r;
 788
 789         unified = cg_unified();
 790         if (unified < 0)
 791                 return unified;
 792         if (unified)
 793                 return 0;
 794
 795         /* Compatibility, Always keep values for "tasks" in sync with
 796          * "cgroup.procs" */
 797         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 798                 (void) chmod_and_chown(procs, mode, uid, gid);
 799
 800         return 0;
 801 }
 802 #endif // 0
 803
 804 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 805         _cleanup_fclose_ FILE *f = NULL;
 806         char line[LINE_MAX];
 807         const char *fs;
 808         size_t cs = 0;
 809         int unified;
 810
 811         assert(path);
 812         assert(pid >= 0);
 813
 814         unified = cg_unified();
 815         if (unified < 0)
 816                 return unified;
 817         if (unified == 0) {
 818                 if (controller) {
 819                         if (!cg_controller_is_valid(controller))
 820                                 return -EINVAL;
 821                 } else
 822                         controller = ELOGIND_CGROUP_CONTROLLER;
 823
 824                 cs = strlen(controller);
 825         }
 826
 827         fs = procfs_file_alloca(pid, "cgroup");
 828         f = fopen(fs, "re");
 829         if (!f)
 830                 return errno == ENOENT ? -ESRCH : -errno;
 831
 832         FOREACH_LINE(line, f, return -errno) {
 833                 char *e, *p;
 834
 835                 truncate_nl(line);
 836
 837                 if (unified) {
 838                         e = startswith(line, "0:");
 839                         if (!e)
 840                                 continue;
 841
 842                         e = strchr(e, ':');
 843                         if (!e)
 844                                 continue;
 845                 } else {
 846                         char *l;
 847                         size_t k;
 848                         const char *word, *state;
 849                         bool found = false;
 850
 851                         l = strchr(line, ':');
 852                         if (!l)
 853                                 continue;
 854
 855                         l++;
 856                         e = strchr(l, ':');
 857                         if (!e)
 858                                 continue;
 859
 860                         *e = 0;
 861                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 862                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 863                                         found = true;
 864                                         break;
 865                                 }
 866                         }
 867
 868                         if (!found)
 869                                 continue;
 870                 }
 871
 872                 p = strdup(e + 1);
 873                 if (!p)
 874                         return -ENOMEM;
 875
 876                 *path = p;
 877                 return 0;
 878         }
 879
 880         return -ENODATA;
 881 }
 882
 883 int cg_install_release_agent(const char *controller, const char *agent) {
 884         _cleanup_free_ char *fs = NULL, *contents = NULL;
 885         const char *sc;
 886         int r, unified;
 887
 888         assert(agent);
 889
 890         unified = cg_unified();
 891         if (unified < 0)
 892                 return unified;
 893         if (unified) /* doesn't apply to unified hierarchy */
 894                 return -EOPNOTSUPP;
 895
 896         r = cg_get_path(controller, NULL, "release_agent", &fs);
 897         if (r < 0)
 898                 return r;
 899
 900         r = read_one_line_file(fs, &contents);
 901         if (r < 0)
 902                 return r;
 903
 904         sc = strstrip(contents);
 905         if (isempty(sc)) {
 906                 r = write_string_file_no_create(fs, agent);
 907                 if (r < 0)
 908                         return r;
 909         } else if (!path_equal(sc, agent))
 910                 return -EEXIST;
 911
 912         fs = mfree(fs);
 913         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 914         if (r < 0)
 915                 return r;
 916
 917         contents = mfree(contents);
 918         r = read_one_line_file(fs, &contents);
 919         if (r < 0)
 920                 return r;
 921
 922         sc = strstrip(contents);
 923         if (streq(sc, "0")) {
 924                 r = write_string_file_no_create(fs, "1");
 925                 if (r < 0)
 926                         return r;
 927
 928                 return 1;
 929         }
 930
 931         if (!streq(sc, "1"))
 932                 return -EIO;
 933
 934         return 0;
 935 }
 936
 937 int cg_uninstall_release_agent(const char *controller) {
 938         _cleanup_free_ char *fs = NULL;
 939         int r, unified;
 940
 941         unified = cg_unified();
 942         if (unified < 0)
 943                 return unified;
 944         if (unified) /* Doesn't apply to unified hierarchy */
 945                 return -EOPNOTSUPP;
 946
 947         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 948         if (r < 0)
 949                 return r;
 950
 951         r = write_string_file_no_create(fs, "0");
 952         if (r < 0)
 953                 return r;
 954
 955         fs = mfree(fs);
 956
 957         r = cg_get_path(controller, NULL, "release_agent", &fs);
 958         if (r < 0)
 959                 return r;
 960
 961         r = write_string_file_no_create(fs, "");
 962         if (r < 0)
 963                 return r;
 964
 965         return 0;
 966 }
 967
 968 int cg_is_empty(const char *controller, const char *path) {
 969         _cleanup_fclose_ FILE *f = NULL;
 970         pid_t pid;
 971         int r;
 972
 973         assert(path);
 974
 975         r = cg_enumerate_processes(controller, path, &f);
 976         if (r == -ENOENT)
 977                 return 1;
 978         if (r < 0)
 979                 return r;
 980
 981         r = cg_read_pid(f, &pid);
 982         if (r < 0)
 983                 return r;
 984
 985         return r == 0;
 986 }
 987
 988 int cg_is_empty_recursive(const char *controller, const char *path) {
 989         int unified, r;
 990
 991         assert(path);
 992
 993         /* The root cgroup is always populated */
 994         if (controller && (isempty(path) || path_equal(path, "/")))
 995                 return false;
 996
 997         unified = cg_unified();
 998         if (unified < 0)
 999                 return unified;
1000
1001         if (unified > 0) {
1002                 _cleanup_free_ char *populated = NULL, *t = NULL;
1003
1004                 /* On the unified hierarchy we can check empty state
1005                  * via the "cgroup.populated" attribute. */
1006
1007                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1008         if (r < 0)
1009                 return r;
1010
1011                 r = read_one_line_file(populated, &t);
1012                 if (r == -ENOENT)
1013                         return 1;
1014                 if (r < 0)
1015                         return r;
1016
1017                 return streq(t, "0");
1018         } else {
1019         _cleanup_closedir_ DIR *d = NULL;
1020         char *fn;
1021
1022                 r = cg_is_empty(controller, path);
1023         if (r <= 0)
1024                 return r;
1025
1026         r = cg_enumerate_subgroups(controller, path, &d);
1027                 if (r == -ENOENT)
1028                         return 1;
1029         if (r < 0)
1030                         return r;
1031
1032         while ((r = cg_read_subgroup(d, &fn)) > 0) {
1033                 _cleanup_free_ char *p = NULL;
1034
1035                 p = strjoin(path, "/", fn, NULL);
1036                 free(fn);
1037                 if (!p)
1038                         return -ENOMEM;
1039
1040                         r = cg_is_empty_recursive(controller, p);
1041                 if (r <= 0)
1042                         return r;
1043         }
1044         if (r < 0)
1045                 return r;
1046
1047                 return true;
1048         }
1049 }
1050
1051 int cg_split_spec(const char *spec, char **controller, char **path) {
1052         char *t = NULL, *u = NULL;
1053         const char *e;
1054
1055         assert(spec);
1056
1057         if (*spec == '/') {
1058                 if (!path_is_safe(spec))
1059                         return -EINVAL;
1060
1061                 if (path) {
1062                         t = strdup(spec);
1063                         if (!t)
1064                                 return -ENOMEM;
1065
1066                         *path = path_kill_slashes(t);
1067                 }
1068
1069                 if (controller)
1070                         *controller = NULL;
1071
1072                 return 0;
1073         }
1074
1075         e = strchr(spec, ':');
1076         if (!e) {
1077                 if (!cg_controller_is_valid(spec))
1078                         return -EINVAL;
1079
1080                 if (controller) {
1081                         t = strdup(spec);
1082                         if (!t)
1083                                 return -ENOMEM;
1084
1085                         *controller = t;
1086                 }
1087
1088                 if (path)
1089                         *path = NULL;
1090
1091                 return 0;
1092         }
1093
1094         t = strndup(spec, e-spec);
1095         if (!t)
1096                 return -ENOMEM;
1097         if (!cg_controller_is_valid(t)) {
1098                 free(t);
1099                 return -EINVAL;
1100         }
1101
1102         if (isempty(e+1))
1103                 u = NULL;
1104         else {
1105                 u = strdup(e+1);
1106                 if (!u) {
1107                         free(t);
1108                         return -ENOMEM;
1109                 }
1110
1111                 if (!path_is_safe(u) ||
1112                     !path_is_absolute(u)) {
1113                         free(t);
1114                         free(u);
1115                         return -EINVAL;
1116                 }
1117
1118                 path_kill_slashes(u);
1119         }
1120
1121         if (controller)
1122                 *controller = t;
1123         else
1124                 free(t);
1125
1126         if (path)
1127                 *path = u;
1128         else
1129                 free(u);
1130
1131         return 0;
1132 }
1133
1134 int cg_mangle_path(const char *path, char **result) {
1135         _cleanup_free_ char *c = NULL, *p = NULL;
1136         char *t;
1137         int r;
1138
1139         assert(path);
1140         assert(result);
1141
1142         /* First, check if it already is a filesystem path */
1143         if (path_startswith(path, "/sys/fs/cgroup")) {
1144
1145                 t = strdup(path);
1146                 if (!t)
1147                         return -ENOMEM;
1148
1149                 *result = path_kill_slashes(t);
1150                 return 0;
1151         }
1152
1153         /* Otherwise, treat it as cg spec */
1154         r = cg_split_spec(path, &c, &p);
1155         if (r < 0)
1156                 return r;
1157
1158         return cg_get_path(c ? c : ELOGIND_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1159 }
1160
1161 int cg_get_root_path(char **path) {
1162 /// elogind does not support systemd scopes and slices
1163 #if 0
1164         char *p, *e;
1165         int r;
1166
1167         assert(path);
1168
1169         r = cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, 1, &p);
1170         if (r < 0)
1171                 return r;
1172
1173         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1174         if (!e)
1175                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1176         if (!e)
1177                 e = endswith(p, "/system"); /* even more legacy */
1178         if (e)
1179                 *e = 0;
1180
1181         *path = p;
1182         return 0;
1183 #else
1184         assert(path);
1185         return cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, 1, path);
1186 #endif // 0
1187 }
1188
1189 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1190         _cleanup_free_ char *rt = NULL;
1191         char *p;
1192         int r;
1193
1194         assert(cgroup);
1195         assert(shifted);
1196
1197         if (!root) {
1198                 /* If the root was specified let's use that, otherwise
1199                  * let's determine it from PID 1 */
1200
1201                 r = cg_get_root_path(&rt);
1202                 if (r < 0)
1203                         return r;
1204
1205                 root = rt;
1206                 log_debug_elogind("Determined root path: \"%s\"", root);
1207         }
1208
1209         p = path_startswith(cgroup, root);
1210         if (p && p[0] && (p > cgroup))
1211                 *shifted = p - 1;
1212         else
1213                 *shifted = cgroup;
1214
1215         return 0;
1216 }
1217
1218 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1219         _cleanup_free_ char *raw = NULL;
1220         const char *c;
1221         int r;
1222
1223         assert(pid >= 0);
1224         assert(cgroup);
1225
1226         r = cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, pid, &raw);
1227         if (r < 0)
1228                 return r;
1229
1230         log_debug_elogind("Shifting path: \"%s\" (PID %u, root: \"%s\")",
1231                           raw, pid, root ? root : "NULL");
1232         r = cg_shift_path(raw, root, &c);
1233         if (r < 0)
1234                 return r;
1235
1236         if (c == raw) {
1237                 *cgroup = raw;
1238                 raw = NULL;
1239         } else {
1240                 char *n;
1241
1242                 n = strdup(c);
1243                 if (!n)
1244                         return -ENOMEM;
1245
1246                 *cgroup = n;
1247         }
1248         log_debug_elogind("Resulting cgroup:\"%s\"", *cgroup);
1249
1250         return 0;
1251 }
1252
1253 /// UNNEEDED by elogind
1254 #if 0
1255 int cg_path_decode_unit(const char *cgroup, char **unit){
1256         char *c, *s;
1257         size_t n;
1258
1259         assert(cgroup);
1260         assert(unit);
1261
1262         n = strcspn(cgroup, "/");
1263         if (n < 3)
1264                 return -ENXIO;
1265
1266         c = strndupa(cgroup, n);
1267         c = cg_unescape(c);
1268
1269         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1270                 return -ENXIO;
1271
1272         s = strdup(c);
1273         if (!s)
1274                 return -ENOMEM;
1275
1276         *unit = s;
1277         return 0;
1278 }
1279
1280 static bool valid_slice_name(const char *p, size_t n) {
1281
1282         if (!p)
1283                 return false;
1284
1285         if (n < strlen("x.slice"))
1286                 return false;
1287
1288         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1289                 char buf[n+1], *c;
1290
1291                 memcpy(buf, p, n);
1292                 buf[n] = 0;
1293
1294                 c = cg_unescape(buf);
1295
1296                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1297         }
1298
1299         return false;
1300 }
1301
1302 static const char *skip_slices(const char *p) {
1303         assert(p);
1304
1305         /* Skips over all slice assignments */
1306
1307         for (;;) {
1308                 size_t n;
1309
1310                 p += strspn(p, "/");
1311
1312                 n = strcspn(p, "/");
1313                 if (!valid_slice_name(p, n))
1314                         return p;
1315
1316                 p += n;
1317         }
1318 }
1319
1320 int cg_path_get_unit(const char *path, char **ret) {
1321         const char *e;
1322         char *unit;
1323         int r;
1324
1325         assert(path);
1326         assert(ret);
1327
1328         e = skip_slices(path);
1329
1330         r = cg_path_decode_unit(e, &unit);
1331         if (r < 0)
1332                 return r;
1333
1334         /* We skipped over the slices, don't accept any now */
1335         if (endswith(unit, ".slice")) {
1336                 free(unit);
1337                 return -ENXIO;
1338         }
1339
1340         *ret = unit;
1341         return 0;
1342 }
1343
1344 int cg_pid_get_unit(pid_t pid, char **unit) {
1345         _cleanup_free_ char *cgroup = NULL;
1346         int r;
1347
1348         assert(unit);
1349
1350         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1351         if (r < 0)
1352                 return r;
1353
1354         return cg_path_get_unit(cgroup, unit);
1355 }
1356
1357 /**
1358  * Skip session-*.scope, but require it to be there.
1359  */
1360 static const char *skip_session(const char *p) {
1361         size_t n;
1362
1363         if (isempty(p))
1364                 return NULL;
1365
1366         p += strspn(p, "/");
1367
1368         n = strcspn(p, "/");
1369         if (n < strlen("session-x.scope"))
1370                 return NULL;
1371
1372         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1373                 char buf[n - 8 - 6 + 1];
1374
1375                 memcpy(buf, p + 8, n - 8 - 6);
1376                 buf[n - 8 - 6] = 0;
1377
1378                 /* Note that session scopes never need unescaping,
1379                  * since they cannot conflict with the kernel's own
1380                  * names, hence we don't need to call cg_unescape()
1381                  * here. */
1382
1383                 if (!session_id_valid(buf))
1384                         return false;
1385
1386                 p += n;
1387                 p += strspn(p, "/");
1388                 return p;
1389         }
1390
1391         return NULL;
1392 }
1393
1394 /**
1395  * Skip user@*.service, but require it to be there.
1396  */
1397 static const char *skip_user_manager(const char *p) {
1398         size_t n;
1399
1400         if (isempty(p))
1401                 return NULL;
1402
1403         p += strspn(p, "/");
1404
1405         n = strcspn(p, "/");
1406         if (n < strlen("user@x.service"))
1407                 return NULL;
1408
1409         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1410                 char buf[n - 5 - 8 + 1];
1411
1412                 memcpy(buf, p + 5, n - 5 - 8);
1413                 buf[n - 5 - 8] = 0;
1414
1415                 /* Note that user manager services never need unescaping,
1416                  * since they cannot conflict with the kernel's own
1417                  * names, hence we don't need to call cg_unescape()
1418                  * here. */
1419
1420                 if (parse_uid(buf, NULL) < 0)
1421                         return NULL;
1422
1423                 p += n;
1424                 p += strspn(p, "/");
1425
1426                 return p;
1427         }
1428
1429         return NULL;
1430 }
1431
1432 static const char *skip_user_prefix(const char *path) {
1433         const char *e, *t;
1434
1435         assert(path);
1436
1437         /* Skip slices, if there are any */
1438         e = skip_slices(path);
1439
1440         /* Skip the user manager, if it's in the path now... */
1441         t = skip_user_manager(e);
1442         if (t)
1443                 return t;
1444
1445         /* Alternatively skip the user session if it is in the path... */
1446         return skip_session(e);
1447 }
1448
1449 int cg_path_get_user_unit(const char *path, char **ret) {
1450         const char *t;
1451
1452         assert(path);
1453         assert(ret);
1454
1455         t = skip_user_prefix(path);
1456         if (!t)
1457                 return -ENXIO;
1458
1459         /* And from here on it looks pretty much the same as for a
1460          * system unit, hence let's use the same parser from here
1461          * on. */
1462         return cg_path_get_unit(t, ret);
1463 }
1464
1465 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1466         _cleanup_free_ char *cgroup = NULL;
1467         int r;
1468
1469         assert(unit);
1470
1471         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1472         if (r < 0)
1473                 return r;
1474
1475         return cg_path_get_user_unit(cgroup, unit);
1476 }
1477
1478 int cg_path_get_machine_name(const char *path, char **machine) {
1479         _cleanup_free_ char *u = NULL;
1480         const char *sl;
1481         int r;
1482
1483         r = cg_path_get_unit(path, &u);
1484         if (r < 0)
1485                 return r;
1486
1487         sl = strjoina("/run/systemd/machines/unit:", u);
1488         return readlink_malloc(sl, machine);
1489 }
1490
1491 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1492         _cleanup_free_ char *cgroup = NULL;
1493         int r;
1494
1495         assert(machine);
1496
1497         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1498         if (r < 0)
1499                 return r;
1500
1501         return cg_path_get_machine_name(cgroup, machine);
1502 }
1503 #endif // 0
1504
1505 int cg_path_get_session(const char *path, char **session) {
1506         /* Elogind uses a flat hierarchy, just "/SESSION".  The only
1507            wrinkle is that SESSION might be escaped.  */
1508 #if 0
1509         _cleanup_free_ char *unit = NULL;
1510         char *start, *end;
1511         int r;
1512
1513         assert(path);
1514
1515         r = cg_path_get_unit(path, &unit);
1516         if (r < 0)
1517                 return r;
1518
1519         start = startswith(unit, "session-");
1520         if (!start)
1521                 return -ENXIO;
1522         end = endswith(start, ".scope");
1523         if (!end)
1524                 return -ENXIO;
1525
1526         *end = 0;
1527         if (!session_id_valid(start))
1528                 return -ENXIO;
1529 #else
1530         const char *e, *n, *start;
1531
1532         assert(path);
1533         log_debug_elogind("path is \"%s\"", path);
1534         assert(path[0] == '/');
1535
1536         e = path + 1;
1537         n = strchrnul(e, '/');
1538         if (e == n)
1539                 return -ENOENT;
1540
1541         start = strndupa(e, n - e);
1542         start = cg_unescape(start);
1543
1544         if (!start[0])
1545                 return -ENOENT;
1546 #endif // 0
1547
1548         if (session) {
1549                 char *rr;
1550
1551                 log_debug_elogind("found session: \"%s\"", start);
1552                 rr = strdup(start);
1553                 if (!rr)
1554                         return -ENOMEM;
1555
1556                 *session = rr;
1557         }
1558
1559         return 0;
1560 }
1561
1562 int cg_pid_get_session(pid_t pid, char **session) {
1563         _cleanup_free_ char *cgroup = NULL;
1564         int r;
1565
1566         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1567         if (r < 0)
1568                 return r;
1569
1570         return cg_path_get_session(cgroup, session);
1571 }
1572
1573 /// UNNEEDED by elogind
1574 #if 0
1575 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1576         _cleanup_free_ char *slice = NULL;
1577         char *start, *end;
1578         int r;
1579
1580         assert(path);
1581
1582         r = cg_path_get_slice(path, &slice);
1583         if (r < 0)
1584                 return r;
1585
1586         start = startswith(slice, "user-");
1587         if (!start)
1588                 return -ENXIO;
1589         end = endswith(start, ".slice");
1590         if (!end)
1591                 return -ENXIO;
1592
1593         *end = 0;
1594         if (parse_uid(start, uid) < 0)
1595                 return -ENXIO;
1596
1597         return 0;
1598 }
1599
1600 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1601         _cleanup_free_ char *cgroup = NULL;
1602         int r;
1603
1604         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1605         if (r < 0)
1606                 return r;
1607
1608         return cg_path_get_owner_uid(cgroup, uid);
1609 }
1610
1611 int cg_path_get_slice(const char *p, char **slice) {
1612         const char *e = NULL;
1613
1614         assert(p);
1615         assert(slice);
1616
1617         /* Finds the right-most slice unit from the beginning, but
1618          * stops before we come to the first non-slice unit. */
1619
1620         for (;;) {
1621                 size_t n;
1622
1623                 p += strspn(p, "/");
1624
1625                 n = strcspn(p, "/");
1626                 if (!valid_slice_name(p, n)) {
1627
1628                         if (!e) {
1629                                 char *s;
1630
1631                                 s = strdup("-.slice");
1632                                 if (!s)
1633                                         return -ENOMEM;
1634
1635                                 *slice = s;
1636                                 return 0;
1637                         }
1638
1639                         return cg_path_decode_unit(e, slice);
1640                 }
1641
1642                 e = p;
1643                 p += n;
1644         }
1645 }
1646
1647 int cg_pid_get_slice(pid_t pid, char **slice) {
1648         _cleanup_free_ char *cgroup = NULL;
1649         int r;
1650
1651         assert(slice);
1652
1653         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1654         if (r < 0)
1655                 return r;
1656
1657         return cg_path_get_slice(cgroup, slice);
1658 }
1659
1660 int cg_path_get_user_slice(const char *p, char **slice) {
1661         const char *t;
1662         assert(p);
1663         assert(slice);
1664
1665         t = skip_user_prefix(p);
1666         if (!t)
1667                 return -ENXIO;
1668
1669         /* And now it looks pretty much the same as for a system
1670          * slice, so let's just use the same parser from here on. */
1671         return cg_path_get_slice(t, slice);
1672 }
1673
1674 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1675         _cleanup_free_ char *cgroup = NULL;
1676         int r;
1677
1678         assert(slice);
1679
1680         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1681         if (r < 0)
1682                 return r;
1683
1684         return cg_path_get_user_slice(cgroup, slice);
1685 }
1686 #endif // 0
1687
1688 char *cg_escape(const char *p) {
1689         bool need_prefix = false;
1690
1691         /* This implements very minimal escaping for names to be used
1692          * as file names in the cgroup tree: any name which might
1693          * conflict with a kernel name or is prefixed with '_' is
1694          * prefixed with a '_'. That way, when reading cgroup names it
1695          * is sufficient to remove a single prefixing underscore if
1696          * there is one. */
1697
1698         /* The return value of this function (unlike cg_unescape())
1699          * needs free()! */
1700
1701         if (p[0] == 0 ||
1702             p[0] == '_' ||
1703             p[0] == '.' ||
1704             streq(p, "notify_on_release") ||
1705             streq(p, "release_agent") ||
1706             streq(p, "tasks") ||
1707             startswith(p, "cgroup."))
1708                 need_prefix = true;
1709         else {
1710                 const char *dot;
1711
1712                 dot = strrchr(p, '.');
1713                 if (dot) {
1714                         CGroupController c;
1715                         size_t l = dot - p;
1716
1717                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1718                                 const char *n;
1719
1720                                 n = cgroup_controller_to_string(c);
1721
1722                                 if (l != strlen(n))
1723                                         continue;
1724
1725                                 if (memcmp(p, n, l) != 0)
1726                                         continue;
1727
1728                                         need_prefix = true;
1729                                 break;
1730                         }
1731                 }
1732         }
1733
1734         if (need_prefix)
1735                 return strappend("_", p);
1736
1737                 return strdup(p);
1738 }
1739
1740 char *cg_unescape(const char *p) {
1741         assert(p);
1742
1743         /* The return value of this function (unlike cg_escape())
1744          * doesn't need free()! */
1745
1746         if (p[0] == '_')
1747                 return (char*) p+1;
1748
1749         return (char*) p;
1750 }
1751
1752 #define CONTROLLER_VALID                        \
1753         DIGITS LETTERS                          \
1754         "_"
1755
1756 bool cg_controller_is_valid(const char *p) {
1757         const char *t, *s;
1758
1759         if (!p)
1760                 return false;
1761
1762         s = startswith(p, "name=");
1763         if (s)
1764                 p = s;
1765
1766         if (*p == 0 || *p == '_')
1767                 return false;
1768
1769         for (t = p; *t; t++)
1770                 if (!strchr(CONTROLLER_VALID, *t))
1771                         return false;
1772
1773         if (t - p > FILENAME_MAX)
1774                 return false;
1775
1776         return true;
1777 }
1778
1779 /// UNNEEDED by elogind
1780 #if 0
1781 int cg_slice_to_path(const char *unit, char **ret) {
1782         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1783         const char *dash;
1784         int r;
1785
1786         assert(unit);
1787         assert(ret);
1788
1789         if (streq(unit, "-.slice")) {
1790                 char *x;
1791
1792                 x = strdup("");
1793                 if (!x)
1794                         return -ENOMEM;
1795                 *ret = x;
1796                 return 0;
1797         }
1798
1799         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1800                 return -EINVAL;
1801
1802         if (!endswith(unit, ".slice"))
1803                 return -EINVAL;
1804
1805         r = unit_name_to_prefix(unit, &p);
1806         if (r < 0)
1807                 return r;
1808
1809         dash = strchr(p, '-');
1810
1811         /* Don't allow initial dashes */
1812         if (dash == p)
1813                 return -EINVAL;
1814
1815         while (dash) {
1816                 _cleanup_free_ char *escaped = NULL;
1817                 char n[dash - p + sizeof(".slice")];
1818
1819                 /* Don't allow trailing or double dashes */
1820                 if (dash[1] == 0 || dash[1] == '-')
1821                         return -EINVAL;
1822
1823                 strcpy(stpncpy(n, p, dash - p), ".slice");
1824                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1825                         return -EINVAL;
1826
1827                 escaped = cg_escape(n);
1828                 if (!escaped)
1829                         return -ENOMEM;
1830
1831                 if (!strextend(&s, escaped, "/", NULL))
1832                         return -ENOMEM;
1833
1834                 dash = strchr(dash+1, '-');
1835         }
1836
1837         e = cg_escape(unit);
1838         if (!e)
1839                 return -ENOMEM;
1840
1841         if (!strextend(&s, e, NULL))
1842                 return -ENOMEM;
1843
1844         *ret = s;
1845         s = NULL;
1846
1847         return 0;
1848 }
1849 #endif // 0
1850
1851 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1852         _cleanup_free_ char *p = NULL;
1853         int r;
1854
1855         r = cg_get_path(controller, path, attribute, &p);
1856         if (r < 0)
1857                 return r;
1858
1859         return write_string_file_no_create(p, value);
1860 }
1861
1862 /// UNNEEDED by elogind
1863 #if 0
1864 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1865         _cleanup_free_ char *p = NULL;
1866         int r;
1867
1868         r = cg_get_path(controller, path, attribute, &p);
1869         if (r < 0)
1870                 return r;
1871
1872         return read_one_line_file(p, ret);
1873 }
1874
1875 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1876         CGroupController c;
1877         int r, unified;
1878
1879         /* This one will create a cgroup in our private tree, but also
1880          * duplicate it in the trees specified in mask, and remove it
1881          * in all others */
1882
1883         /* First create the cgroup in our own hierarchy. */
1884         r = cg_create(ELOGIND_CGROUP_CONTROLLER, path);
1885         if (r < 0)
1886                 return r;
1887
1888         /* If we are in the unified hierarchy, we are done now */
1889         unified = cg_unified();
1890         if (unified < 0)
1891                 return unified;
1892         if (unified > 0)
1893                 return 0;
1894
1895         /* Otherwise, do the same in the other hierarchies */
1896         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1897                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1898                 const char *n;
1899
1900                 n = cgroup_controller_to_string(c);
1901
1902                 if (mask & bit)
1903                         (void) cg_create(n, path);
1904                 else if (supported & bit)
1905                         (void) cg_trim(n, path, true);
1906         }
1907
1908         return 0;
1909 }
1910
1911 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1912         CGroupController c;
1913         int r, unified;
1914
1915         r = cg_attach(ELOGIND_CGROUP_CONTROLLER, path, pid);
1916         if (r < 0)
1917                 return r;
1918
1919         unified = cg_unified();
1920         if (unified < 0)
1921                 return unified;
1922         if (unified > 0)
1923                 return 0;
1924
1925         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1926                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1927                         const char *p = NULL;
1928
1929                 if (!(supported & bit))
1930                         continue;
1931
1932                         if (path_callback)
1933                                 p = path_callback(bit, userdata);
1934
1935                         if (!p)
1936                                 p = path;
1937
1938                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1939         }
1940
1941         return 0;
1942 }
1943
1944 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1945         Iterator i;
1946         void *pidp;
1947         int r = 0;
1948
1949         SET_FOREACH(pidp, pids, i) {
1950                 pid_t pid = PTR_TO_PID(pidp);
1951                 int q;
1952
1953                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1954                 if (q < 0 && r >= 0)
1955                         r = q;
1956         }
1957
1958         return r;
1959 }
1960
1961 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1962         CGroupController c;
1963         int r = 0, unified;
1964
1965         if (!path_equal(from, to))  {
1966                 r = cg_migrate_recursive(ELOGIND_CGROUP_CONTROLLER, from, ELOGIND_CGROUP_CONTROLLER, to, false, true);
1967                 if (r < 0)
1968                         return r;
1969         }
1970
1971         unified = cg_unified();
1972         if (unified < 0)
1973                 return unified;
1974         if (unified > 0)
1975                 return r;
1976
1977         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1978                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1979                         const char *p = NULL;
1980
1981                 if (!(supported & bit))
1982                         continue;
1983
1984                         if (to_callback)
1985                                 p = to_callback(bit, userdata);
1986
1987                         if (!p)
1988                                 p = to;
1989
1990                 (void) cg_migrate_recursive_fallback(ELOGIND_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1991         }
1992
1993         return 0;
1994 }
1995
1996 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1997         CGroupController c;
1998         int r, unified;
1999
2000         r = cg_trim(ELOGIND_CGROUP_CONTROLLER, path, delete_root);
2001         if (r < 0)
2002                 return r;
2003
2004         unified = cg_unified();
2005         if (unified < 0)
2006                 return unified;
2007         if (unified > 0)
2008                 return r;
2009
2010         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2011                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2012
2013                 if (!(supported & bit))
2014                         continue;
2015
2016                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2017         }
2018
2019         return 0;
2020 }
2021 #endif // 0
2022
2023 int cg_mask_supported(CGroupMask *ret) {
2024         CGroupMask mask = 0;
2025         int r, unified;
2026
2027         /* Determines the mask of supported cgroup controllers. Only
2028          * includes controllers we can make sense of and that are
2029          * actually accessible. */
2030
2031         unified = cg_unified();
2032         if (unified < 0)
2033                 return unified;
2034         if (unified > 0) {
2035                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2036                 const char *c;
2037
2038                 /* In the unified hierarchy we can read the supported
2039                  * and accessible controllers from a the top-level
2040                  * cgroup attribute */
2041
2042                 r = cg_get_root_path(&root);
2043                 if (r < 0)
2044                         return r;
2045
2046                 r = cg_get_path(ELOGIND_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2047                 if (r < 0)
2048                         return r;
2049
2050                 r = read_one_line_file(path, &controllers);
2051                 if (r < 0)
2052                         return r;
2053
2054                 c = controllers;
2055                 for (;;) {
2056                         _cleanup_free_ char *n = NULL;
2057                         CGroupController v;
2058
2059                         r = extract_first_word(&c, &n, NULL, 0);
2060                         if (r < 0)
2061                                 return r;
2062                         if (r == 0)
2063                                 break;
2064
2065                         v = cgroup_controller_from_string(n);
2066                         if (v < 0)
2067                                 continue;
2068
2069                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2070         }
2071
2072                 /* Currently, we only support the memory controller in
2073                  * the unified hierarchy, mask everything else off. */
2074                 mask &= CGROUP_MASK_MEMORY;
2075
2076         } else {
2077                 CGroupController c;
2078
2079                 /* In the legacy hierarchy, we check whether which
2080                  * hierarchies are mounted. */
2081
2082                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2083                         const char *n;
2084
2085                         n = cgroup_controller_to_string(c);
2086                         if (controller_is_accessible(n) >= 0)
2087                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2088                 }
2089         }
2090
2091         *ret = mask;
2092         return 0;
2093 }
2094
2095 /// UNNEEDED by elogind
2096 #if 0
2097 int cg_kernel_controllers(Set *controllers) {
2098         _cleanup_fclose_ FILE *f = NULL;
2099         char buf[LINE_MAX];
2100         int r;
2101
2102         assert(controllers);
2103
2104         /* Determines the full list of kernel-known controllers. Might
2105          * include controllers we don't actually support, arbitrary
2106          * named hierarchies and controllers that aren't currently
2107          * accessible (because not mounted). */
2108
2109         f = fopen("/proc/cgroups", "re");
2110         if (!f) {
2111                 if (errno == ENOENT)
2112                         return 0;
2113                 return -errno;
2114         }
2115
2116         /* Ignore the header line */
2117         (void) fgets(buf, sizeof(buf), f);
2118
2119         for (;;) {
2120                 char *controller;
2121                 int enabled = 0;
2122
2123                 errno = 0;
2124                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2125
2126                         if (feof(f))
2127                                 break;
2128
2129                         if (ferror(f) && errno != 0)
2130                                 return -errno;
2131
2132                         return -EBADMSG;
2133                 }
2134
2135                 if (!enabled) {
2136                         free(controller);
2137                         continue;
2138                 }
2139
2140                 if (!cg_controller_is_valid(controller)) {
2141                         free(controller);
2142                         return -EBADMSG;
2143                 }
2144
2145                 r = set_consume(controllers, controller);
2146                 if (r < 0)
2147                         return r;
2148         }
2149
2150         return 0;
2151 }
2152 #endif // 0
2153
2154 static thread_local int unified_cache = -1;
2155
2156 int cg_unified(void) {
2157         struct statfs fs;
2158
2159         /* Checks if we support the unified hierarchy. Returns an
2160          * error when the cgroup hierarchies aren't mounted yet or we
2161          * have any other trouble determining if the unified hierarchy
2162          * is supported. */
2163
2164         if (unified_cache >= 0)
2165                 return unified_cache;
2166
2167         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2168                 return -errno;
2169
2170 /// elogind can not support the unified hierarchy as a controller,
2171 /// so always assume a classical hierarchy.
2172 /// If, ond only *if*, someone really wants to substitute systemd-login
2173 /// in an environment managed by systemd with elogin, we might have to
2174 /// add such a support.
2175 #if 0
2176         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2177                 unified_cache = true;
2178         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2179 #else
2180         if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2181 #endif // elogind
2182                 unified_cache = false;
2183         else
2184                 return -ENOEXEC;
2185
2186         return unified_cache;
2187 }
2188
2189 /// UNNEEDED by elogind
2190 #if 0
2191 void cg_unified_flush(void) {
2192         unified_cache = -1;
2193 }
2194
2195 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2196         _cleanup_free_ char *fs = NULL;
2197         CGroupController c;
2198         int r, unified;
2199
2200         assert(p);
2201
2202         if (supported == 0)
2203                 return 0;
2204
2205         unified = cg_unified();
2206         if (unified < 0)
2207                 return unified;
2208         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2209                 return 0;
2210
2211         r = cg_get_path(ELOGIND_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2212         if (r < 0)
2213                 return r;
2214
2215         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2216                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2217                 const char *n;
2218
2219                 if (!(supported & bit))
2220                         continue;
2221
2222                 n = cgroup_controller_to_string(c);
2223                 {
2224                         char s[1 + strlen(n) + 1];
2225
2226                         s[0] = mask & bit ? '+' : '-';
2227                         strcpy(s + 1, n);
2228
2229                         r = write_string_file(fs, s, 0);
2230                         if (r < 0)
2231                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2232                 }
2233         }
2234
2235         return 0;
2236 }
2237
2238 bool cg_is_unified_wanted(void) {
2239         static thread_local int wanted = -1;
2240         int r, unified;
2241
2242         /* If the hierarchy is already mounted, then follow whatever
2243          * was chosen for it. */
2244         unified = cg_unified();
2245         if (unified >= 0)
2246                 return unified;
2247
2248         /* Otherwise, let's see what the kernel command line has to
2249          * say. Since checking that is expensive, let's cache the
2250          * result. */
2251         if (wanted >= 0)
2252                 return wanted;
2253
2254         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2255         if (r > 0)
2256                 return (wanted = true);
2257         else {
2258                 _cleanup_free_ char *value = NULL;
2259
2260                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2261                 if (r < 0)
2262                         return false;
2263                 if (r == 0)
2264                         return (wanted = false);
2265
2266                 return (wanted = parse_boolean(value) > 0);
2267         }
2268 }
2269
2270 bool cg_is_legacy_wanted(void) {
2271         return !cg_is_unified_wanted();
2272 }
2273 #endif // 0
2274
2275 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2276         [CGROUP_CONTROLLER_CPU] = "cpu",
2277         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2278         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2279         [CGROUP_CONTROLLER_MEMORY] = "memory",
2280         [CGROUP_CONTROLLER_DEVICE] = "devices",
2281 };
2282
2283 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);