src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 // #include "unit-name.h"
  39 #include "fileio.h"
  40 // #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, PID_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         log_debug_elogind("Migrating \"%s\"/\"%s\" to \"%s\"/\"%s\" (%s)",
 301                           cfrom, pfrom, cto, pto,
 302                           ignore_self ? "ignoring self" : "watching self");
 303
 304         do {
 305                 _cleanup_fclose_ FILE *f = NULL;
 306                 pid_t pid = 0;
 307                 done = true;
 308
 309                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 310                 if (r < 0) {
 311                         if (ret >= 0 && r != -ENOENT)
 312                                 return r;
 313
 314                         return ret;
 315                 }
 316
 317                 while ((r = cg_read_pid(f, &pid)) > 0) {
 318
 319                         /* This might do weird stuff if we aren't a
 320                          * single-threaded program. However, we
 321                          * luckily know we are not */
 322                         if (ignore_self && pid == my_pid)
 323                                 continue;
 324
 325                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 326                                 continue;
 327
 328                         /* Ignore kernel threads. Since they can only
 329                          * exist in the root cgroup, we only check for
 330                          * them there. */
 331                         if (cfrom &&
 332                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 333                             is_kernel_thread(pid) > 0)
 334                                 continue;
 335
 336                         r = cg_attach(cto, pto, pid);
 337                         if (r < 0) {
 338                                 if (ret >= 0 && r != -ESRCH)
 339                                         ret = r;
 340                         } else if (ret == 0)
 341                                 ret = 1;
 342
 343                         done = false;
 344
 345                         r = set_put(s, PID_TO_PTR(pid));
 346                         if (r < 0) {
 347                                 if (ret >= 0)
 348                                         return r;
 349
 350                                 return ret;
 351                         }
 352                 }
 353
 354                 if (r < 0) {
 355                         if (ret >= 0)
 356                                 return r;
 357
 358                         return ret;
 359                 }
 360         } while (!done);
 361
 362         return ret;
 363 }
 364
 365 int cg_migrate_recursive(
 366                 const char *cfrom,
 367                 const char *pfrom,
 368                 const char *cto,
 369                 const char *pto,
 370                 bool ignore_self,
 371                 bool rem) {
 372
 373         _cleanup_closedir_ DIR *d = NULL;
 374         int r, ret = 0;
 375         char *fn;
 376
 377         assert(cfrom);
 378         assert(pfrom);
 379         assert(cto);
 380         assert(pto);
 381
 382         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 383
 384         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 385         if (r < 0) {
 386                 if (ret >= 0 && r != -ENOENT)
 387                         return r;
 388
 389                 return ret;
 390         }
 391
 392         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 393                 _cleanup_free_ char *p = NULL;
 394
 395                 p = strjoin(pfrom, "/", fn, NULL);
 396                 free(fn);
 397                 if (!p)
 398                                 return -ENOMEM;
 399
 400                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 401                 if (r != 0 && ret >= 0)
 402                         ret = r;
 403         }
 404
 405         if (r < 0 && ret >= 0)
 406                 ret = r;
 407
 408         if (rem) {
 409                 r = cg_rmdir(cfrom, pfrom);
 410                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 411                         return r;
 412         }
 413
 414         return ret;
 415 }
 416
 417 int cg_migrate_recursive_fallback(
 418                 const char *cfrom,
 419                 const char *pfrom,
 420                 const char *cto,
 421                 const char *pto,
 422                 bool ignore_self,
 423                 bool rem) {
 424
 425         int r;
 426
 427         assert(cfrom);
 428         assert(pfrom);
 429         assert(cto);
 430         assert(pto);
 431
 432         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 433         if (r < 0) {
 434                 char prefix[strlen(pto) + 1];
 435
 436                 /* This didn't work? Then let's try all prefixes of the destination */
 437
 438                 PATH_FOREACH_PREFIX(prefix, pto) {
 439                         int q;
 440
 441                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 442                         if (q >= 0)
 443                                 return q;
 444                 }
 445         }
 446
 447         return r;
 448 }
 449
 450 static const char *controller_to_dirname(const char *controller) {
 451         const char *e;
 452
 453         assert(controller);
 454
 455         /* Converts a controller name to the directory name below
 456          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 457          * just cuts off the name= prefixed used for named
 458          * hierarchies, if it is specified. */
 459
 460         e = startswith(controller, "name=");
 461         if (e)
 462                 return e;
 463
 464                 return controller;
 465 }
 466
 467 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 468         const char *dn;
 469         char *t = NULL;
 470
 471         assert(fs);
 472         assert(controller);
 473
 474         dn = controller_to_dirname(controller);
 475
 476         if (isempty(path) && isempty(suffix))
 477                 t = strappend("/sys/fs/cgroup/", dn);
 478         else if (isempty(path))
 479                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 480         else if (isempty(suffix))
 481                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 482                 else
 483                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 484         if (!t)
 485                 return -ENOMEM;
 486
 487         *fs = t;
 488         return 0;
 489         }
 490
 491 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 492         char *t;
 493
 494         assert(fs);
 495
 496         if (isempty(path) && isempty(suffix))
 497                 t = strdup("/sys/fs/cgroup");
 498         else if (isempty(path))
 499                 t = strappend("/sys/fs/cgroup/", suffix);
 500         else if (isempty(suffix))
 501                 t = strappend("/sys/fs/cgroup/", path);
 502         else
 503                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 504         if (!t)
 505                 return -ENOMEM;
 506
 507         *fs = t;
 508         return 0;
 509 }
 510
 511 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 512         int unified, r;
 513
 514         assert(fs);
 515
 516         if (!controller) {
 517                 char *t;
 518
 519                 /* If no controller is specified, we return the path
 520                  * *below* the controllers, without any prefix. */
 521
 522                 if (!path && !suffix)
 523                 return -EINVAL;
 524
 525                 if (!suffix)
 526                         t = strdup(path);
 527                 else if (!path)
 528                         t = strdup(suffix);
 529                 else
 530                         t = strjoin(path, "/", suffix, NULL);
 531                 if (!t)
 532                         return -ENOMEM;
 533
 534                 *fs = path_kill_slashes(t);
 535                 return 0;
 536         }
 537
 538         if (!cg_controller_is_valid(controller))
 539                 return -EINVAL;
 540
 541         unified = cg_unified();
 542         if (unified < 0)
 543                 return unified;
 544
 545         if (unified > 0)
 546                 r = join_path_unified(path, suffix, fs);
 547         else
 548                 r = join_path_legacy(controller, path, suffix, fs);
 549                 if (r < 0)
 550                         return r;
 551
 552         path_kill_slashes(*fs);
 553         return 0;
 554         }
 555
 556 static int controller_is_accessible(const char *controller) {
 557         int unified;
 558
 559         assert(controller);
 560
 561         /* Checks whether a specific controller is accessible,
 562          * i.e. its hierarchy mounted. In the unified hierarchy all
 563          * controllers are considered accessible, except for the named
 564          * hierarchies */
 565
 566         if (!cg_controller_is_valid(controller))
 567                 return -EINVAL;
 568
 569         unified = cg_unified();
 570         if (unified < 0)
 571                 return unified;
 572         if (unified > 0) {
 573                 /* We don't support named hierarchies if we are using
 574                  * the unified hierarchy. */
 575
 576                 if (streq(controller, ELOGIND_CGROUP_CONTROLLER))
 577                         return 0;
 578
 579                 if (startswith(controller, "name="))
 580                         return -EOPNOTSUPP;
 581
 582         } else {
 583                 const char *cc, *dn;
 584
 585                 dn = controller_to_dirname(controller);
 586                 cc = strjoina("/sys/fs/cgroup/", dn);
 587
 588                 if (laccess(cc, F_OK) < 0)
 589                         return -errno;
 590         }
 591
 592         return 0;
 593 }
 594
 595 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 596         int r;
 597
 598         assert(controller);
 599         assert(fs);
 600
 601         /* Check if the specified controller is actually accessible */
 602         r = controller_is_accessible(controller);
 603         if (r < 0)
 604                 return r;
 605
 606         return cg_get_path(controller, path, suffix, fs);
 607 }
 608
 609 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 610         assert(path);
 611         assert(sb);
 612         assert(ftwbuf);
 613
 614         if (typeflag != FTW_DP)
 615                 return 0;
 616
 617         if (ftwbuf->level < 1)
 618                 return 0;
 619
 620         (void) rmdir(path);
 621         return 0;
 622 }
 623
 624 int cg_trim(const char *controller, const char *path, bool delete_root) {
 625         _cleanup_free_ char *fs = NULL;
 626         int r = 0;
 627
 628         assert(path);
 629
 630         r = cg_get_path(controller, path, NULL, &fs);
 631         if (r < 0)
 632                 return r;
 633
 634         errno = 0;
 635         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 636                 if (errno == ENOENT)
 637                         r = 0;
 638                 else if (errno != 0)
 639                         r = -errno;
 640                 else
 641                         r = -EIO;
 642         }
 643
 644         if (delete_root) {
 645                 if (rmdir(fs) < 0 && errno != ENOENT)
 646                         return -errno;
 647         }
 648
 649         return r;
 650 }
 651
 652 int cg_create(const char *controller, const char *path) {
 653         _cleanup_free_ char *fs = NULL;
 654         int r;
 655
 656         r = cg_get_path_and_check(controller, path, NULL, &fs);
 657         if (r < 0)
 658                 return r;
 659
 660         r = mkdir_parents(fs, 0755);
 661         if (r < 0)
 662                 return r;
 663
 664         if (mkdir(fs, 0755) < 0) {
 665
 666                 if (errno == EEXIST)
 667                         return 0;
 668
 669                 return -errno;
 670         }
 671
 672         return 1;
 673 }
 674
 675 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 676         int r, q;
 677
 678         assert(pid >= 0);
 679
 680         r = cg_create(controller, path);
 681         if (r < 0)
 682                 return r;
 683
 684         q = cg_attach(controller, path, pid);
 685         if (q < 0)
 686                 return q;
 687
 688         /* This does not remove the cgroup on failure */
 689         return r;
 690 }
 691
 692 int cg_attach(const char *controller, const char *path, pid_t pid) {
 693         _cleanup_free_ char *fs = NULL;
 694         char c[DECIMAL_STR_MAX(pid_t) + 2];
 695         int r;
 696
 697         assert(path);
 698         assert(pid >= 0);
 699
 700         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 701         if (r < 0)
 702                 return r;
 703
 704         if (pid == 0)
 705                 pid = getpid();
 706
 707         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 708
 709         return write_string_file_no_create(fs, c);
 710 }
 711
 712 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 713         int r;
 714
 715         assert(controller);
 716         assert(path);
 717         assert(pid >= 0);
 718
 719         r = cg_attach(controller, path, pid);
 720         if (r < 0) {
 721                 char prefix[strlen(path) + 1];
 722
 723                 /* This didn't work? Then let's try all prefixes of
 724                  * the destination */
 725
 726                 PATH_FOREACH_PREFIX(prefix, path) {
 727                         int q;
 728
 729                         q = cg_attach(controller, prefix, pid);
 730                         if (q >= 0)
 731                                 return q;
 732                 }
 733         }
 734
 735         return r;
 736 }
 737
 738 /// UNNEEDED by elogind
 739 #if 0
 740 int cg_set_group_access(
 741                 const char *controller,
 742                 const char *path,
 743                 mode_t mode,
 744                 uid_t uid,
 745                 gid_t gid) {
 746
 747         _cleanup_free_ char *fs = NULL;
 748         int r;
 749
 750         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 751                 return 0;
 752
 753         if (mode != MODE_INVALID)
 754                 mode &= 0777;
 755
 756         r = cg_get_path(controller, path, NULL, &fs);
 757         if (r < 0)
 758                 return r;
 759
 760         return chmod_and_chown(fs, mode, uid, gid);
 761 }
 762
 763 int cg_set_task_access(
 764                 const char *controller,
 765                 const char *path,
 766                 mode_t mode,
 767                 uid_t uid,
 768                 gid_t gid) {
 769
 770         _cleanup_free_ char *fs = NULL, *procs = NULL;
 771         int r, unified;
 772
 773         assert(path);
 774
 775         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 776                 return 0;
 777
 778         if (mode != MODE_INVALID)
 779                 mode &= 0666;
 780
 781         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 782         if (r < 0)
 783                 return r;
 784
 785         r = chmod_and_chown(fs, mode, uid, gid);
 786         if (r < 0)
 787                 return r;
 788
 789         unified = cg_unified();
 790         if (unified < 0)
 791                 return unified;
 792         if (unified)
 793                 return 0;
 794
 795         /* Compatibility, Always keep values for "tasks" in sync with
 796          * "cgroup.procs" */
 797         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 798                 (void) chmod_and_chown(procs, mode, uid, gid);
 799
 800         return 0;
 801 }
 802 #endif // 0
 803
 804 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 805         _cleanup_fclose_ FILE *f = NULL;
 806         char line[LINE_MAX];
 807         const char *fs;
 808         size_t cs = 0;
 809         int unified;
 810
 811         assert(path);
 812         assert(pid >= 0);
 813
 814         unified = cg_unified();
 815         if (unified < 0)
 816                 return unified;
 817         if (unified == 0) {
 818                 if (controller) {
 819                         if (!cg_controller_is_valid(controller))
 820                                 return -EINVAL;
 821                 } else
 822                         controller = ELOGIND_CGROUP_CONTROLLER;
 823
 824                 cs = strlen(controller);
 825         }
 826
 827         fs = procfs_file_alloca(pid, "cgroup");
 828         f = fopen(fs, "re");
 829         if (!f)
 830                 return errno == ENOENT ? -ESRCH : -errno;
 831
 832         FOREACH_LINE(line, f, return -errno) {
 833                 char *e, *p;
 834
 835                 truncate_nl(line);
 836
 837                 if (unified) {
 838                         e = startswith(line, "0:");
 839                         if (!e)
 840                                 continue;
 841
 842                         e = strchr(e, ':');
 843                         if (!e)
 844                                 continue;
 845                 } else {
 846                         char *l;
 847                         size_t k;
 848                         const char *word, *state;
 849                         bool found = false;
 850
 851                         l = strchr(line, ':');
 852                         if (!l)
 853                                 continue;
 854
 855                         l++;
 856                         e = strchr(l, ':');
 857                         if (!e)
 858                                 continue;
 859
 860                         *e = 0;
 861                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 862                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 863                                         found = true;
 864                                         break;
 865                                 }
 866                         }
 867
 868                         if (!found)
 869                                 continue;
 870                 }
 871
 872                 p = strdup(e + 1);
 873                 if (!p)
 874                         return -ENOMEM;
 875
 876                 *path = p;
 877                 return 0;
 878         }
 879
 880         return -ENODATA;
 881 }
 882
 883 int cg_install_release_agent(const char *controller, const char *agent) {
 884         _cleanup_free_ char *fs = NULL, *contents = NULL;
 885         const char *sc;
 886         int r, unified;
 887
 888         assert(agent);
 889
 890         unified = cg_unified();
 891         if (unified < 0)
 892                 return unified;
 893         if (unified) /* doesn't apply to unified hierarchy */
 894                 return -EOPNOTSUPP;
 895
 896         r = cg_get_path(controller, NULL, "release_agent", &fs);
 897         if (r < 0)
 898                 return r;
 899
 900         r = read_one_line_file(fs, &contents);
 901         if (r < 0)
 902                 return r;
 903
 904         sc = strstrip(contents);
 905         if (isempty(sc)) {
 906                 r = write_string_file_no_create(fs, agent);
 907                 if (r < 0)
 908                         return r;
 909         } else if (!path_equal(sc, agent))
 910                 return -EEXIST;
 911
 912         fs = mfree(fs);
 913         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 914         if (r < 0)
 915                 return r;
 916
 917         contents = mfree(contents);
 918         r = read_one_line_file(fs, &contents);
 919         if (r < 0)
 920                 return r;
 921
 922         sc = strstrip(contents);
 923         if (streq(sc, "0")) {
 924                 r = write_string_file_no_create(fs, "1");
 925                 if (r < 0)
 926                         return r;
 927
 928                 return 1;
 929         }
 930
 931         if (!streq(sc, "1"))
 932                 return -EIO;
 933
 934         return 0;
 935 }
 936
 937 int cg_uninstall_release_agent(const char *controller) {
 938         _cleanup_free_ char *fs = NULL;
 939         int r, unified;
 940
 941         unified = cg_unified();
 942         if (unified < 0)
 943                 return unified;
 944         if (unified) /* Doesn't apply to unified hierarchy */
 945                 return -EOPNOTSUPP;
 946
 947         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 948         if (r < 0)
 949                 return r;
 950
 951         r = write_string_file_no_create(fs, "0");
 952         if (r < 0)
 953                 return r;
 954
 955         fs = mfree(fs);
 956
 957         r = cg_get_path(controller, NULL, "release_agent", &fs);
 958         if (r < 0)
 959                 return r;
 960
 961         r = write_string_file_no_create(fs, "");
 962         if (r < 0)
 963                 return r;
 964
 965         return 0;
 966 }
 967
 968 int cg_is_empty(const char *controller, const char *path) {
 969         _cleanup_fclose_ FILE *f = NULL;
 970         pid_t pid;
 971         int r;
 972
 973         assert(path);
 974
 975         r = cg_enumerate_processes(controller, path, &f);
 976         if (r == -ENOENT)
 977                 return 1;
 978         if (r < 0)
 979                 return r;
 980
 981         r = cg_read_pid(f, &pid);
 982         if (r < 0)
 983                 return r;
 984
 985         return r == 0;
 986 }
 987
 988 int cg_is_empty_recursive(const char *controller, const char *path) {
 989         int unified, r;
 990
 991         assert(path);
 992
 993         /* The root cgroup is always populated */
 994         if (controller && (isempty(path) || path_equal(path, "/")))
 995                 return false;
 996
 997         unified = cg_unified();
 998         if (unified < 0)
 999                 return unified;
1000
1001         if (unified > 0) {
1002                 _cleanup_free_ char *populated = NULL, *t = NULL;
1003
1004                 /* On the unified hierarchy we can check empty state
1005                  * via the "cgroup.populated" attribute. */
1006
1007                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1008         if (r < 0)
1009                 return r;
1010
1011                 r = read_one_line_file(populated, &t);
1012                 if (r == -ENOENT)
1013                         return 1;
1014                 if (r < 0)
1015                         return r;
1016
1017                 return streq(t, "0");
1018         } else {
1019         _cleanup_closedir_ DIR *d = NULL;
1020         char *fn;
1021
1022                 r = cg_is_empty(controller, path);
1023         if (r <= 0)
1024                 return r;
1025
1026         r = cg_enumerate_subgroups(controller, path, &d);
1027                 if (r == -ENOENT)
1028                         return 1;
1029         if (r < 0)
1030                         return r;
1031
1032         while ((r = cg_read_subgroup(d, &fn)) > 0) {
1033                 _cleanup_free_ char *p = NULL;
1034
1035                 p = strjoin(path, "/", fn, NULL);
1036                 free(fn);
1037                 if (!p)
1038                         return -ENOMEM;
1039
1040                         r = cg_is_empty_recursive(controller, p);
1041                 if (r <= 0)
1042                         return r;
1043         }
1044         if (r < 0)
1045                 return r;
1046
1047                 return true;
1048         }
1049 }
1050
1051 int cg_split_spec(const char *spec, char **controller, char **path) {
1052         char *t = NULL, *u = NULL;
1053         const char *e;
1054
1055         assert(spec);
1056
1057         if (*spec == '/') {
1058                 if (!path_is_safe(spec))
1059                         return -EINVAL;
1060
1061                 if (path) {
1062                         t = strdup(spec);
1063                         if (!t)
1064                                 return -ENOMEM;
1065
1066                         *path = path_kill_slashes(t);
1067                 }
1068
1069                 if (controller)
1070                         *controller = NULL;
1071
1072                 return 0;
1073         }
1074
1075         e = strchr(spec, ':');
1076         if (!e) {
1077                 if (!cg_controller_is_valid(spec))
1078                         return -EINVAL;
1079
1080                 if (controller) {
1081                         t = strdup(spec);
1082                         if (!t)
1083                                 return -ENOMEM;
1084
1085                         *controller = t;
1086                 }
1087
1088                 if (path)
1089                         *path = NULL;
1090
1091                 return 0;
1092         }
1093
1094         t = strndup(spec, e-spec);
1095         if (!t)
1096                 return -ENOMEM;
1097         if (!cg_controller_is_valid(t)) {
1098                 free(t);
1099                 return -EINVAL;
1100         }
1101
1102         if (isempty(e+1))
1103                 u = NULL;
1104         else {
1105                 u = strdup(e+1);
1106                 if (!u) {
1107                         free(t);
1108                         return -ENOMEM;
1109                 }
1110
1111                 if (!path_is_safe(u) ||
1112                     !path_is_absolute(u)) {
1113                         free(t);
1114                         free(u);
1115                         return -EINVAL;
1116                 }
1117
1118                 path_kill_slashes(u);
1119         }
1120
1121         if (controller)
1122                 *controller = t;
1123         else
1124                 free(t);
1125
1126         if (path)
1127                 *path = u;
1128         else
1129                 free(u);
1130
1131         return 0;
1132 }
1133
1134 int cg_mangle_path(const char *path, char **result) {
1135         _cleanup_free_ char *c = NULL, *p = NULL;
1136         char *t;
1137         int r;
1138
1139         assert(path);
1140         assert(result);
1141
1142         /* First, check if it already is a filesystem path */
1143         if (path_startswith(path, "/sys/fs/cgroup")) {
1144
1145                 t = strdup(path);
1146                 if (!t)
1147                         return -ENOMEM;
1148
1149                 *result = path_kill_slashes(t);
1150                 return 0;
1151         }
1152
1153         /* Otherwise, treat it as cg spec */
1154         r = cg_split_spec(path, &c, &p);
1155         if (r < 0)
1156                 return r;
1157
1158         return cg_get_path(c ? c : ELOGIND_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1159 }
1160
1161 int cg_get_root_path(char **path) {
1162 /// elogind does not support systemd scopes and slices
1163 #if 0
1164         char *p, *e;
1165         int r;
1166
1167         assert(path);
1168
1169         r = cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, 1, &p);
1170         if (r < 0)
1171                 return r;
1172
1173         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1174         if (!e)
1175                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1176         if (!e)
1177                 e = endswith(p, "/system"); /* even more legacy */
1178         if (e)
1179                 *e = 0;
1180
1181         *path = p;
1182         return 0;
1183 #else
1184         assert(path);
1185         return cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, 1, path);
1186 #endif // 0
1187 }
1188
1189 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1190         _cleanup_free_ char *rt = NULL;
1191         char *p;
1192         int r;
1193
1194         assert(cgroup);
1195         assert(shifted);
1196
1197         if (!root) {
1198                 /* If the root was specified let's use that, otherwise
1199                  * let's determine it from PID 1 */
1200
1201                 r = cg_get_root_path(&rt);
1202                 if (r < 0)
1203                         return r;
1204
1205                 root = rt;
1206         }
1207
1208         p = path_startswith(cgroup, root);
1209         if (p && p > cgroup)
1210                 *shifted = p - 1;
1211         else
1212                 *shifted = cgroup;
1213
1214         return 0;
1215 }
1216
1217 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1218         _cleanup_free_ char *raw = NULL;
1219         const char *c;
1220         int r;
1221
1222         assert(pid >= 0);
1223         assert(cgroup);
1224
1225         r = cg_pid_get_path(ELOGIND_CGROUP_CONTROLLER, pid, &raw);
1226         if (r < 0)
1227                 return r;
1228
1229         log_debug_elogind("Shifting path: \"%s\" (PID %u, root: \"%s\")",
1230                           raw, pid, root ? root : "NULL");
1231         r = cg_shift_path(raw, root, &c);
1232         if (r < 0)
1233                 return r;
1234
1235         if (c == raw) {
1236                 *cgroup = raw;
1237                 raw = NULL;
1238         } else {
1239                 char *n;
1240
1241                 n = strdup(c);
1242                 if (!n)
1243                         return -ENOMEM;
1244
1245                 *cgroup = n;
1246         }
1247         log_debug_elogind("Resulting cgroup:\"%s\"", *cgroup);
1248
1249         return 0;
1250 }
1251
1252 /// UNNEEDED by elogind
1253 #if 0
1254 int cg_path_decode_unit(const char *cgroup, char **unit){
1255         char *c, *s;
1256         size_t n;
1257
1258         assert(cgroup);
1259         assert(unit);
1260
1261         n = strcspn(cgroup, "/");
1262         if (n < 3)
1263                 return -ENXIO;
1264
1265         c = strndupa(cgroup, n);
1266         c = cg_unescape(c);
1267
1268         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1269                 return -ENXIO;
1270
1271         s = strdup(c);
1272         if (!s)
1273                 return -ENOMEM;
1274
1275         *unit = s;
1276         return 0;
1277 }
1278
1279 static bool valid_slice_name(const char *p, size_t n) {
1280
1281         if (!p)
1282                 return false;
1283
1284         if (n < strlen("x.slice"))
1285                 return false;
1286
1287         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1288                 char buf[n+1], *c;
1289
1290                 memcpy(buf, p, n);
1291                 buf[n] = 0;
1292
1293                 c = cg_unescape(buf);
1294
1295                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1296         }
1297
1298         return false;
1299 }
1300
1301 static const char *skip_slices(const char *p) {
1302         assert(p);
1303
1304         /* Skips over all slice assignments */
1305
1306         for (;;) {
1307                 size_t n;
1308
1309                 p += strspn(p, "/");
1310
1311                 n = strcspn(p, "/");
1312                 if (!valid_slice_name(p, n))
1313                         return p;
1314
1315                 p += n;
1316         }
1317 }
1318
1319 int cg_path_get_unit(const char *path, char **ret) {
1320         const char *e;
1321         char *unit;
1322         int r;
1323
1324         assert(path);
1325         assert(ret);
1326
1327         e = skip_slices(path);
1328
1329         r = cg_path_decode_unit(e, &unit);
1330         if (r < 0)
1331                 return r;
1332
1333         /* We skipped over the slices, don't accept any now */
1334         if (endswith(unit, ".slice")) {
1335                 free(unit);
1336                 return -ENXIO;
1337         }
1338
1339         *ret = unit;
1340         return 0;
1341 }
1342
1343 int cg_pid_get_unit(pid_t pid, char **unit) {
1344         _cleanup_free_ char *cgroup = NULL;
1345         int r;
1346
1347         assert(unit);
1348
1349         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1350         if (r < 0)
1351                 return r;
1352
1353         return cg_path_get_unit(cgroup, unit);
1354 }
1355
1356 /**
1357  * Skip session-*.scope, but require it to be there.
1358  */
1359 static const char *skip_session(const char *p) {
1360         size_t n;
1361
1362         if (isempty(p))
1363                 return NULL;
1364
1365         p += strspn(p, "/");
1366
1367         n = strcspn(p, "/");
1368         if (n < strlen("session-x.scope"))
1369                 return NULL;
1370
1371         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1372                 char buf[n - 8 - 6 + 1];
1373
1374                 memcpy(buf, p + 8, n - 8 - 6);
1375                 buf[n - 8 - 6] = 0;
1376
1377                 /* Note that session scopes never need unescaping,
1378                  * since they cannot conflict with the kernel's own
1379                  * names, hence we don't need to call cg_unescape()
1380                  * here. */
1381
1382                 if (!session_id_valid(buf))
1383                         return false;
1384
1385                 p += n;
1386                 p += strspn(p, "/");
1387                 return p;
1388         }
1389
1390         return NULL;
1391 }
1392
1393 /**
1394  * Skip user@*.service, but require it to be there.
1395  */
1396 static const char *skip_user_manager(const char *p) {
1397         size_t n;
1398
1399         if (isempty(p))
1400                 return NULL;
1401
1402         p += strspn(p, "/");
1403
1404         n = strcspn(p, "/");
1405         if (n < strlen("user@x.service"))
1406                 return NULL;
1407
1408         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1409                 char buf[n - 5 - 8 + 1];
1410
1411                 memcpy(buf, p + 5, n - 5 - 8);
1412                 buf[n - 5 - 8] = 0;
1413
1414                 /* Note that user manager services never need unescaping,
1415                  * since they cannot conflict with the kernel's own
1416                  * names, hence we don't need to call cg_unescape()
1417                  * here. */
1418
1419                 if (parse_uid(buf, NULL) < 0)
1420                         return NULL;
1421
1422                 p += n;
1423                 p += strspn(p, "/");
1424
1425                 return p;
1426         }
1427
1428         return NULL;
1429 }
1430
1431 static const char *skip_user_prefix(const char *path) {
1432         const char *e, *t;
1433
1434         assert(path);
1435
1436         /* Skip slices, if there are any */
1437         e = skip_slices(path);
1438
1439         /* Skip the user manager, if it's in the path now... */
1440         t = skip_user_manager(e);
1441         if (t)
1442                 return t;
1443
1444         /* Alternatively skip the user session if it is in the path... */
1445         return skip_session(e);
1446 }
1447
1448 int cg_path_get_user_unit(const char *path, char **ret) {
1449         const char *t;
1450
1451         assert(path);
1452         assert(ret);
1453
1454         t = skip_user_prefix(path);
1455         if (!t)
1456                 return -ENXIO;
1457
1458         /* And from here on it looks pretty much the same as for a
1459          * system unit, hence let's use the same parser from here
1460          * on. */
1461         return cg_path_get_unit(t, ret);
1462 }
1463
1464 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1465         _cleanup_free_ char *cgroup = NULL;
1466         int r;
1467
1468         assert(unit);
1469
1470         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1471         if (r < 0)
1472                 return r;
1473
1474         return cg_path_get_user_unit(cgroup, unit);
1475 }
1476
1477 int cg_path_get_machine_name(const char *path, char **machine) {
1478         _cleanup_free_ char *u = NULL;
1479         const char *sl;
1480         int r;
1481
1482         r = cg_path_get_unit(path, &u);
1483         if (r < 0)
1484                 return r;
1485
1486         sl = strjoina("/run/systemd/machines/unit:", u);
1487         return readlink_malloc(sl, machine);
1488 }
1489
1490 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1491         _cleanup_free_ char *cgroup = NULL;
1492         int r;
1493
1494         assert(machine);
1495
1496         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1497         if (r < 0)
1498                 return r;
1499
1500         return cg_path_get_machine_name(cgroup, machine);
1501 }
1502 #endif // 0
1503
1504 int cg_path_get_session(const char *path, char **session) {
1505         /* Elogind uses a flat hierarchy, just "/SESSION".  The only
1506            wrinkle is that SESSION might be escaped.  */
1507 #if 0
1508         _cleanup_free_ char *unit = NULL;
1509         char *start, *end;
1510         int r;
1511
1512         assert(path);
1513
1514         r = cg_path_get_unit(path, &unit);
1515         if (r < 0)
1516                 return r;
1517
1518         start = startswith(unit, "session-");
1519         if (!start)
1520                 return -ENXIO;
1521         end = endswith(start, ".scope");
1522         if (!end)
1523                 return -ENXIO;
1524
1525         *end = 0;
1526         if (!session_id_valid(start))
1527                 return -ENXIO;
1528 #else
1529         const char *e, *n, *start;
1530
1531         assert(path);
1532         log_debug_elogind("path is \"%s\"", path);
1533         assert(path[0] == '/');
1534
1535         e = path + 1;
1536         n = strchrnul(e, '/');
1537         if (e == n)
1538                 return -ENOENT;
1539
1540         start = strndupa(e, n - e);
1541         start = cg_unescape(start);
1542
1543         if (!start[0])
1544                 return -ENOENT;
1545 #endif // 0
1546
1547         if (session) {
1548                 char *rr;
1549
1550                 log_debug_elogind("found session: \"%s\"", start);
1551                 rr = strdup(start);
1552                 if (!rr)
1553                         return -ENOMEM;
1554
1555                 *session = rr;
1556         }
1557
1558         return 0;
1559 }
1560
1561 int cg_pid_get_session(pid_t pid, char **session) {
1562         _cleanup_free_ char *cgroup = NULL;
1563         int r;
1564
1565         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1566         if (r < 0)
1567                 return r;
1568
1569         return cg_path_get_session(cgroup, session);
1570 }
1571
1572 /// UNNEEDED by elogind
1573 #if 0
1574 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1575         _cleanup_free_ char *slice = NULL;
1576         char *start, *end;
1577         int r;
1578
1579         assert(path);
1580
1581         r = cg_path_get_slice(path, &slice);
1582         if (r < 0)
1583                 return r;
1584
1585         start = startswith(slice, "user-");
1586         if (!start)
1587                 return -ENXIO;
1588         end = endswith(start, ".slice");
1589         if (!end)
1590                 return -ENXIO;
1591
1592         *end = 0;
1593         if (parse_uid(start, uid) < 0)
1594                 return -ENXIO;
1595
1596         return 0;
1597 }
1598
1599 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1600         _cleanup_free_ char *cgroup = NULL;
1601         int r;
1602
1603         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1604         if (r < 0)
1605                 return r;
1606
1607         return cg_path_get_owner_uid(cgroup, uid);
1608 }
1609
1610 int cg_path_get_slice(const char *p, char **slice) {
1611         const char *e = NULL;
1612
1613         assert(p);
1614         assert(slice);
1615
1616         /* Finds the right-most slice unit from the beginning, but
1617          * stops before we come to the first non-slice unit. */
1618
1619         for (;;) {
1620                 size_t n;
1621
1622                 p += strspn(p, "/");
1623
1624                 n = strcspn(p, "/");
1625                 if (!valid_slice_name(p, n)) {
1626
1627                         if (!e) {
1628                                 char *s;
1629
1630                                 s = strdup("-.slice");
1631                                 if (!s)
1632                                         return -ENOMEM;
1633
1634                                 *slice = s;
1635                                 return 0;
1636                         }
1637
1638                         return cg_path_decode_unit(e, slice);
1639                 }
1640
1641                 e = p;
1642                 p += n;
1643         }
1644 }
1645
1646 int cg_pid_get_slice(pid_t pid, char **slice) {
1647         _cleanup_free_ char *cgroup = NULL;
1648         int r;
1649
1650         assert(slice);
1651
1652         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1653         if (r < 0)
1654                 return r;
1655
1656         return cg_path_get_slice(cgroup, slice);
1657 }
1658
1659 int cg_path_get_user_slice(const char *p, char **slice) {
1660         const char *t;
1661         assert(p);
1662         assert(slice);
1663
1664         t = skip_user_prefix(p);
1665         if (!t)
1666                 return -ENXIO;
1667
1668         /* And now it looks pretty much the same as for a system
1669          * slice, so let's just use the same parser from here on. */
1670         return cg_path_get_slice(t, slice);
1671 }
1672
1673 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1674         _cleanup_free_ char *cgroup = NULL;
1675         int r;
1676
1677         assert(slice);
1678
1679         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1680         if (r < 0)
1681                 return r;
1682
1683         return cg_path_get_user_slice(cgroup, slice);
1684 }
1685 #endif // 0
1686
1687 char *cg_escape(const char *p) {
1688         bool need_prefix = false;
1689
1690         /* This implements very minimal escaping for names to be used
1691          * as file names in the cgroup tree: any name which might
1692          * conflict with a kernel name or is prefixed with '_' is
1693          * prefixed with a '_'. That way, when reading cgroup names it
1694          * is sufficient to remove a single prefixing underscore if
1695          * there is one. */
1696
1697         /* The return value of this function (unlike cg_unescape())
1698          * needs free()! */
1699
1700         if (p[0] == 0 ||
1701             p[0] == '_' ||
1702             p[0] == '.' ||
1703             streq(p, "notify_on_release") ||
1704             streq(p, "release_agent") ||
1705             streq(p, "tasks") ||
1706             startswith(p, "cgroup."))
1707                 need_prefix = true;
1708         else {
1709                 const char *dot;
1710
1711                 dot = strrchr(p, '.');
1712                 if (dot) {
1713                         CGroupController c;
1714                         size_t l = dot - p;
1715
1716                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1717                                 const char *n;
1718
1719                                 n = cgroup_controller_to_string(c);
1720
1721                                 if (l != strlen(n))
1722                                         continue;
1723
1724                                 if (memcmp(p, n, l) != 0)
1725                                         continue;
1726
1727                                         need_prefix = true;
1728                                 break;
1729                         }
1730                 }
1731         }
1732
1733         if (need_prefix)
1734                 return strappend("_", p);
1735
1736                 return strdup(p);
1737 }
1738
1739 char *cg_unescape(const char *p) {
1740         assert(p);
1741
1742         /* The return value of this function (unlike cg_escape())
1743          * doesn't need free()! */
1744
1745         if (p[0] == '_')
1746                 return (char*) p+1;
1747
1748         return (char*) p;
1749 }
1750
1751 #define CONTROLLER_VALID                        \
1752         DIGITS LETTERS                          \
1753         "_"
1754
1755 bool cg_controller_is_valid(const char *p) {
1756         const char *t, *s;
1757
1758         if (!p)
1759                 return false;
1760
1761         s = startswith(p, "name=");
1762         if (s)
1763                 p = s;
1764
1765         if (*p == 0 || *p == '_')
1766                 return false;
1767
1768         for (t = p; *t; t++)
1769                 if (!strchr(CONTROLLER_VALID, *t))
1770                         return false;
1771
1772         if (t - p > FILENAME_MAX)
1773                 return false;
1774
1775         return true;
1776 }
1777
1778 /// UNNEEDED by elogind
1779 #if 0
1780 int cg_slice_to_path(const char *unit, char **ret) {
1781         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1782         const char *dash;
1783         int r;
1784
1785         assert(unit);
1786         assert(ret);
1787
1788         if (streq(unit, "-.slice")) {
1789                 char *x;
1790
1791                 x = strdup("");
1792                 if (!x)
1793                         return -ENOMEM;
1794                 *ret = x;
1795                 return 0;
1796         }
1797
1798         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1799                 return -EINVAL;
1800
1801         if (!endswith(unit, ".slice"))
1802                 return -EINVAL;
1803
1804         r = unit_name_to_prefix(unit, &p);
1805         if (r < 0)
1806                 return r;
1807
1808         dash = strchr(p, '-');
1809
1810         /* Don't allow initial dashes */
1811         if (dash == p)
1812                 return -EINVAL;
1813
1814         while (dash) {
1815                 _cleanup_free_ char *escaped = NULL;
1816                 char n[dash - p + sizeof(".slice")];
1817
1818                 /* Don't allow trailing or double dashes */
1819                 if (dash[1] == 0 || dash[1] == '-')
1820                         return -EINVAL;
1821
1822                 strcpy(stpncpy(n, p, dash - p), ".slice");
1823                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1824                         return -EINVAL;
1825
1826                 escaped = cg_escape(n);
1827                 if (!escaped)
1828                         return -ENOMEM;
1829
1830                 if (!strextend(&s, escaped, "/", NULL))
1831                         return -ENOMEM;
1832
1833                 dash = strchr(dash+1, '-');
1834         }
1835
1836         e = cg_escape(unit);
1837         if (!e)
1838                 return -ENOMEM;
1839
1840         if (!strextend(&s, e, NULL))
1841                 return -ENOMEM;
1842
1843         *ret = s;
1844         s = NULL;
1845
1846         return 0;
1847 }
1848 #endif // 0
1849
1850 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1851         _cleanup_free_ char *p = NULL;
1852         int r;
1853
1854         r = cg_get_path(controller, path, attribute, &p);
1855         if (r < 0)
1856                 return r;
1857
1858         return write_string_file_no_create(p, value);
1859 }
1860
1861 /// UNNEEDED by elogind
1862 #if 0
1863 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1864         _cleanup_free_ char *p = NULL;
1865         int r;
1866
1867         r = cg_get_path(controller, path, attribute, &p);
1868         if (r < 0)
1869                 return r;
1870
1871         return read_one_line_file(p, ret);
1872 }
1873
1874 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1875         CGroupController c;
1876         int r, unified;
1877
1878         /* This one will create a cgroup in our private tree, but also
1879          * duplicate it in the trees specified in mask, and remove it
1880          * in all others */
1881
1882         /* First create the cgroup in our own hierarchy. */
1883         r = cg_create(ELOGIND_CGROUP_CONTROLLER, path);
1884         if (r < 0)
1885                 return r;
1886
1887         /* If we are in the unified hierarchy, we are done now */
1888         unified = cg_unified();
1889         if (unified < 0)
1890                 return unified;
1891         if (unified > 0)
1892                 return 0;
1893
1894         /* Otherwise, do the same in the other hierarchies */
1895         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1896                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1897                 const char *n;
1898
1899                 n = cgroup_controller_to_string(c);
1900
1901                 if (mask & bit)
1902                         (void) cg_create(n, path);
1903                 else if (supported & bit)
1904                         (void) cg_trim(n, path, true);
1905         }
1906
1907         return 0;
1908 }
1909 #endif // 0
1910
1911 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1912         CGroupController c;
1913         int r, unified;
1914
1915         r = cg_attach(ELOGIND_CGROUP_CONTROLLER, path, pid);
1916         if (r < 0)
1917                 return r;
1918
1919         unified = cg_unified();
1920         if (unified < 0)
1921                 return unified;
1922         if (unified > 0)
1923                 return 0;
1924
1925         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1926                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1927                         const char *p = NULL;
1928
1929                 if (!(supported & bit))
1930                         continue;
1931
1932                         if (path_callback)
1933                                 p = path_callback(bit, userdata);
1934
1935                         if (!p)
1936                                 p = path;
1937
1938                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1939         }
1940
1941         return 0;
1942 }
1943
1944 /// UNNEEDED by elogind
1945 #if 0
1946 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1947         Iterator i;
1948         void *pidp;
1949         int r = 0;
1950
1951         SET_FOREACH(pidp, pids, i) {
1952                 pid_t pid = PTR_TO_PID(pidp);
1953                 int q;
1954
1955                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1956                 if (q < 0 && r >= 0)
1957                         r = q;
1958         }
1959
1960         return r;
1961 }
1962
1963 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1964         CGroupController c;
1965         int r = 0, unified;
1966
1967         if (!path_equal(from, to))  {
1968                 r = cg_migrate_recursive(ELOGIND_CGROUP_CONTROLLER, from, ELOGIND_CGROUP_CONTROLLER, to, false, true);
1969                 if (r < 0)
1970                         return r;
1971         }
1972
1973         unified = cg_unified();
1974         if (unified < 0)
1975                 return unified;
1976         if (unified > 0)
1977                 return r;
1978
1979         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1980                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1981                         const char *p = NULL;
1982
1983                 if (!(supported & bit))
1984                         continue;
1985
1986                         if (to_callback)
1987                                 p = to_callback(bit, userdata);
1988
1989                         if (!p)
1990                                 p = to;
1991
1992                 (void) cg_migrate_recursive_fallback(ELOGIND_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1993         }
1994
1995         return 0;
1996 }
1997
1998 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1999         CGroupController c;
2000         int r, unified;
2001
2002         r = cg_trim(ELOGIND_CGROUP_CONTROLLER, path, delete_root);
2003         if (r < 0)
2004                 return r;
2005
2006         unified = cg_unified();
2007         if (unified < 0)
2008                 return unified;
2009         if (unified > 0)
2010                 return r;
2011
2012         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2013                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2014
2015                 if (!(supported & bit))
2016                         continue;
2017
2018                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2019         }
2020
2021         return 0;
2022 }
2023 #endif // 0
2024
2025 int cg_mask_supported(CGroupMask *ret) {
2026         CGroupMask mask = 0;
2027         int r, unified;
2028
2029         /* Determines the mask of supported cgroup controllers. Only
2030          * includes controllers we can make sense of and that are
2031          * actually accessible. */
2032
2033         unified = cg_unified();
2034         if (unified < 0)
2035                 return unified;
2036         if (unified > 0) {
2037                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2038                 const char *c;
2039
2040                 /* In the unified hierarchy we can read the supported
2041                  * and accessible controllers from a the top-level
2042                  * cgroup attribute */
2043
2044                 r = cg_get_root_path(&root);
2045                 if (r < 0)
2046                         return r;
2047
2048                 r = cg_get_path(ELOGIND_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2049                 if (r < 0)
2050                         return r;
2051
2052                 r = read_one_line_file(path, &controllers);
2053                 if (r < 0)
2054                         return r;
2055
2056                 c = controllers;
2057                 for (;;) {
2058                         _cleanup_free_ char *n = NULL;
2059                         CGroupController v;
2060
2061                         r = extract_first_word(&c, &n, NULL, 0);
2062                         if (r < 0)
2063                                 return r;
2064                         if (r == 0)
2065                                 break;
2066
2067                         v = cgroup_controller_from_string(n);
2068                         if (v < 0)
2069                                 continue;
2070
2071                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2072         }
2073
2074                 /* Currently, we only support the memory controller in
2075                  * the unified hierarchy, mask everything else off. */
2076                 mask &= CGROUP_MASK_MEMORY;
2077
2078         } else {
2079                 CGroupController c;
2080
2081                 /* In the legacy hierarchy, we check whether which
2082                  * hierarchies are mounted. */
2083
2084                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2085                         const char *n;
2086
2087                         n = cgroup_controller_to_string(c);
2088                         if (controller_is_accessible(n) >= 0)
2089                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2090                 }
2091         }
2092
2093         *ret = mask;
2094         return 0;
2095 }
2096
2097 /// UNNEEDED by elogind
2098 #if 0
2099 int cg_kernel_controllers(Set *controllers) {
2100         _cleanup_fclose_ FILE *f = NULL;
2101         char buf[LINE_MAX];
2102         int r;
2103
2104         assert(controllers);
2105
2106         /* Determines the full list of kernel-known controllers. Might
2107          * include controllers we don't actually support, arbitrary
2108          * named hierarchies and controllers that aren't currently
2109          * accessible (because not mounted). */
2110
2111         f = fopen("/proc/cgroups", "re");
2112         if (!f) {
2113                 if (errno == ENOENT)
2114                         return 0;
2115                 return -errno;
2116         }
2117
2118         /* Ignore the header line */
2119         (void) fgets(buf, sizeof(buf), f);
2120
2121         for (;;) {
2122                 char *controller;
2123                 int enabled = 0;
2124
2125                 errno = 0;
2126                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2127
2128                         if (feof(f))
2129                                 break;
2130
2131                         if (ferror(f) && errno != 0)
2132                                 return -errno;
2133
2134                         return -EBADMSG;
2135                 }
2136
2137                 if (!enabled) {
2138                         free(controller);
2139                         continue;
2140                 }
2141
2142                 if (!cg_controller_is_valid(controller)) {
2143                         free(controller);
2144                         return -EBADMSG;
2145                 }
2146
2147                 r = set_consume(controllers, controller);
2148                 if (r < 0)
2149                         return r;
2150         }
2151
2152         return 0;
2153 }
2154 #endif // 0
2155
2156 static thread_local int unified_cache = -1;
2157
2158 int cg_unified(void) {
2159         struct statfs fs;
2160
2161         /* Checks if we support the unified hierarchy. Returns an
2162          * error when the cgroup hierarchies aren't mounted yet or we
2163          * have any other trouble determining if the unified hierarchy
2164          * is supported. */
2165
2166         if (unified_cache >= 0)
2167                 return unified_cache;
2168
2169         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2170                 return -errno;
2171
2172 /// elogind can not support the unified hierarchy as a controller,
2173 /// so always assume a classical hierarchy.
2174 /// If, ond only *if*, someone really wants to substitute systemd-login
2175 /// in an environment managed by systemd with elogin, we might have to
2176 /// add such a support.
2177 #if 0
2178         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2179                 unified_cache = true;
2180         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2181 #else
2182         if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2183 #endif // elogind
2184                 unified_cache = false;
2185         else
2186                 return -ENOEXEC;
2187
2188         return unified_cache;
2189 }
2190
2191 /// UNNEEDED by elogind
2192 #if 0
2193 void cg_unified_flush(void) {
2194         unified_cache = -1;
2195 }
2196
2197 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2198         _cleanup_free_ char *fs = NULL;
2199         CGroupController c;
2200         int r, unified;
2201
2202         assert(p);
2203
2204         if (supported == 0)
2205                 return 0;
2206
2207         unified = cg_unified();
2208         if (unified < 0)
2209                 return unified;
2210         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2211                 return 0;
2212
2213         r = cg_get_path(ELOGIND_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2214         if (r < 0)
2215                 return r;
2216
2217         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2218                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2219                 const char *n;
2220
2221                 if (!(supported & bit))
2222                         continue;
2223
2224                 n = cgroup_controller_to_string(c);
2225                 {
2226                         char s[1 + strlen(n) + 1];
2227
2228                         s[0] = mask & bit ? '+' : '-';
2229                         strcpy(s + 1, n);
2230
2231                         r = write_string_file(fs, s, 0);
2232                         if (r < 0)
2233                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2234                 }
2235         }
2236
2237         return 0;
2238 }
2239 #endif // 0
2240
2241 bool cg_is_unified_wanted(void) {
2242         static thread_local int wanted = -1;
2243         int r, unified;
2244
2245         /* If the hierarchy is already mounted, then follow whatever
2246          * was chosen for it. */
2247         unified = cg_unified();
2248         if (unified >= 0)
2249                 return unified;
2250
2251         /* Otherwise, let's see what the kernel command line has to
2252          * say. Since checking that is expensive, let's cache the
2253          * result. */
2254         if (wanted >= 0)
2255                 return wanted;
2256
2257         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2258         if (r > 0)
2259                 return (wanted = true);
2260         else {
2261                 _cleanup_free_ char *value = NULL;
2262
2263                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2264                 if (r < 0)
2265                         return false;
2266                 if (r == 0)
2267                         return (wanted = false);
2268
2269                 return (wanted = parse_boolean(value) > 0);
2270         }
2271 }
2272
2273 /// UNNEEDED by elogind
2274 #if 0
2275 bool cg_is_legacy_wanted(void) {
2276         return !cg_is_unified_wanted();
2277 }
2278 #endif // 0
2279
2280 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2281         [CGROUP_CONTROLLER_CPU] = "cpu",
2282         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2283         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2284         [CGROUP_CONTROLLER_MEMORY] = "memory",
2285         [CGROUP_CONTROLLER_DEVICE] = "devices",
2286 };
2287
2288 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);