src/basic/cgroup-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <dirent.h>
  21 #include <errno.h>
  22 #include <ftw.h>
  23 //#include <limits.h>
  24 #include <signal.h>
  25 //#include <stddef.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 //#include <sys/statfs.h>
  30 #include <sys/types.h>
  31 #include <unistd.h>
  32
  33 #include "alloc-util.h"
  34 #include "cgroup-util.h"
  35 //#include "def.h"
  36 #include "dirent-util.h"
  37 #include "extract-word.h"
  38 #include "fd-util.h"
  39 #include "fileio.h"
  40 #include "formats-util.h"
  41 #include "fs-util.h"
  42 //#include "log.h"
  43 #include "login-util.h"
  44 #include "macro.h"
  45 //#include "missing.h"
  46 #include "mkdir.h"
  47 #include "parse-util.h"
  48 #include "path-util.h"
  49 #include "proc-cmdline.h"
  50 #include "process-util.h"
  51 #include "set.h"
  52 //#include "special.h"
  53 #include "stat-util.h"
  54 #include "stdio-util.h"
  55 #include "string-table.h"
  56 #include "string-util.h"
  57 #include "unit-name.h"
  58 #include "user-util.h"
  59
  60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  61         _cleanup_free_ char *fs = NULL;
  62         FILE *f;
  63         int r;
  64
  65         assert(_f);
  66
  67         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  68         if (r < 0)
  69                 return r;
  70
  71         f = fopen(fs, "re");
  72         if (!f)
  73                 return -errno;
  74
  75         *_f = f;
  76         return 0;
  77 }
  78
  79 int cg_read_pid(FILE *f, pid_t *_pid) {
  80         unsigned long ul;
  81
  82         /* Note that the cgroup.procs might contain duplicates! See
  83          * cgroups.txt for details. */
  84
  85         assert(f);
  86         assert(_pid);
  87
  88         errno = 0;
  89         if (fscanf(f, "%lu", &ul) != 1) {
  90
  91                 if (feof(f))
  92                         return 0;
  93
  94                 return errno > 0 ? -errno : -EIO;
  95         }
  96
  97         if (ul <= 0)
  98                 return -EIO;
  99
 100         *_pid = (pid_t) ul;
 101         return 1;
 102 }
 103
 104 int cg_read_event(const char *controller, const char *path, const char *event,
 105                   char **val)
 106 {
 107         _cleanup_free_ char *events = NULL, *content = NULL;
 108         char *p, *line;
 109         int r;
 110
 111         r = cg_get_path(controller, path, "cgroup.events", &events);
 112         if (r < 0)
 113                 return r;
 114
 115         r = read_full_file(events, &content, NULL);
 116         if (r < 0)
 117                 return r;
 118
 119         p = content;
 120         while ((line = strsep(&p, "\n"))) {
 121                 char *key;
 122
 123                 key = strsep(&line, " ");
 124                 if (!key || !line)
 125                         return -EINVAL;
 126
 127                 if (strcmp(key, event))
 128                         continue;
 129
 130                 *val = strdup(line);
 131                 return 0;
 132         }
 133
 134         return -ENOENT;
 135 }
 136
 137 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 138         _cleanup_free_ char *fs = NULL;
 139         int r;
 140         DIR *d;
 141
 142         assert(_d);
 143
 144         /* This is not recursive! */
 145
 146         r = cg_get_path(controller, path, NULL, &fs);
 147         if (r < 0)
 148                 return r;
 149
 150         d = opendir(fs);
 151         if (!d)
 152                 return -errno;
 153
 154         *_d = d;
 155         return 0;
 156 }
 157
 158 int cg_read_subgroup(DIR *d, char **fn) {
 159         struct dirent *de;
 160
 161         assert(d);
 162         assert(fn);
 163
 164         FOREACH_DIRENT_ALL(de, d, return -errno) {
 165                 char *b;
 166
 167                 if (de->d_type != DT_DIR)
 168                         continue;
 169
 170                 if (streq(de->d_name, ".") ||
 171                     streq(de->d_name, ".."))
 172                         continue;
 173
 174                 b = strdup(de->d_name);
 175                 if (!b)
 176                         return -ENOMEM;
 177
 178                 *fn = b;
 179                 return 1;
 180         }
 181
 182         return 0;
 183 }
 184
 185 int cg_rmdir(const char *controller, const char *path) {
 186         _cleanup_free_ char *p = NULL;
 187         int r;
 188
 189         r = cg_get_path(controller, path, NULL, &p);
 190         if (r < 0)
 191                 return r;
 192
 193         r = rmdir(p);
 194         if (r < 0 && errno != ENOENT)
 195                 return -errno;
 196
 197         return 0;
 198 }
 199
 200 int cg_kill(
 201                 const char *controller,
 202                 const char *path,
 203                 int sig,
 204                 CGroupFlags flags,
 205                 Set *s,
 206                 cg_kill_log_func_t log_kill,
 207                 void *userdata) {
 208
 209         _cleanup_set_free_ Set *allocated_set = NULL;
 210         bool done = false;
 211         int r, ret = 0;
 212         pid_t my_pid;
 213
 214         assert(sig >= 0);
 215
 216          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 217           * SIGCONT on SIGKILL. */
 218         if (IN_SET(sig, SIGCONT, SIGKILL))
 219                 flags &= ~CGROUP_SIGCONT;
 220
 221         /* This goes through the tasks list and kills them all. This
 222          * is repeated until no further processes are added to the
 223          * tasks list, to properly handle forking processes */
 224
 225         if (!s) {
 226                 s = allocated_set = set_new(NULL);
 227                 if (!s)
 228                         return -ENOMEM;
 229         }
 230
 231         my_pid = getpid();
 232
 233         do {
 234                 _cleanup_fclose_ FILE *f = NULL;
 235                 pid_t pid = 0;
 236                 done = true;
 237
 238                 r = cg_enumerate_processes(controller, path, &f);
 239                 if (r < 0) {
 240                         if (ret >= 0 && r != -ENOENT)
 241                                 return r;
 242
 243                         return ret;
 244                 }
 245
 246                 while ((r = cg_read_pid(f, &pid)) > 0) {
 247
 248                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 249                                 continue;
 250
 251                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 252                                 continue;
 253
 254                         if (log_kill)
 255                                 log_kill(pid, sig, userdata);
 256
 257                         /* If we haven't killed this process yet, kill
 258                          * it */
 259                         if (kill(pid, sig) < 0) {
 260                                 if (ret >= 0 && errno != ESRCH)
 261                                         ret = -errno;
 262                         } else {
 263                                 if (flags & CGROUP_SIGCONT)
 264                                         (void) kill(pid, SIGCONT);
 265
 266                                 if (ret == 0)
 267                                         ret = 1;
 268                         }
 269
 270                         done = false;
 271
 272                         r = set_put(s, PID_TO_PTR(pid));
 273                         if (r < 0) {
 274                                 if (ret >= 0)
 275                                         return r;
 276
 277                                 return ret;
 278                         }
 279                 }
 280
 281                 if (r < 0) {
 282                         if (ret >= 0)
 283                                 return r;
 284
 285                         return ret;
 286                 }
 287
 288                 /* To avoid racing against processes which fork
 289                  * quicker than we can kill them we repeat this until
 290                  * no new pids need to be killed. */
 291
 292         } while (!done);
 293
 294         return ret;
 295 }
 296
 297 int cg_kill_recursive(
 298                 const char *controller,
 299                 const char *path,
 300                 int sig,
 301                 CGroupFlags flags,
 302                 Set *s,
 303                 cg_kill_log_func_t log_kill,
 304                 void *userdata) {
 305
 306         _cleanup_set_free_ Set *allocated_set = NULL;
 307         _cleanup_closedir_ DIR *d = NULL;
 308         int r, ret;
 309         char *fn;
 310
 311         assert(path);
 312         assert(sig >= 0);
 313
 314         if (!s) {
 315                 s = allocated_set = set_new(NULL);
 316                 if (!s)
 317                         return -ENOMEM;
 318         }
 319
 320         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 321
 322         r = cg_enumerate_subgroups(controller, path, &d);
 323         if (r < 0) {
 324                 if (ret >= 0 && r != -ENOENT)
 325                         return r;
 326
 327                 return ret;
 328         }
 329
 330         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 331                 _cleanup_free_ char *p = NULL;
 332
 333                 p = strjoin(path, "/", fn, NULL);
 334                 free(fn);
 335                 if (!p)
 336                         return -ENOMEM;
 337
 338                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 339                 if (r != 0 && ret >= 0)
 340                         ret = r;
 341         }
 342         if (ret >= 0 && r < 0)
 343                 ret = r;
 344
 345         if (flags & CGROUP_REMOVE) {
 346                 r = cg_rmdir(controller, path);
 347                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 348                         return r;
 349         }
 350
 351         return ret;
 352 }
 353
 354 int cg_migrate(
 355                 const char *cfrom,
 356                 const char *pfrom,
 357                 const char *cto,
 358                 const char *pto,
 359                 CGroupFlags flags) {
 360
 361         bool done = false;
 362         _cleanup_set_free_ Set *s = NULL;
 363         int r, ret = 0;
 364         pid_t my_pid;
 365
 366         assert(cfrom);
 367         assert(pfrom);
 368         assert(cto);
 369         assert(pto);
 370
 371         s = set_new(NULL);
 372         if (!s)
 373                 return -ENOMEM;
 374
 375         my_pid = getpid();
 376
 377         log_debug_elogind("Migrating \"%s\"/\"%s\" to \"%s\"/\"%s\" (%s)",
 378                           cfrom, pfrom, cto, pto,
 379                           ignore_self ? "ignoring self" : "watching self");
 380         do {
 381                 _cleanup_fclose_ FILE *f = NULL;
 382                 pid_t pid = 0;
 383                 done = true;
 384
 385                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 386                 if (r < 0) {
 387                         if (ret >= 0 && r != -ENOENT)
 388                                 return r;
 389
 390                         return ret;
 391                 }
 392
 393                 while ((r = cg_read_pid(f, &pid)) > 0) {
 394
 395                         /* This might do weird stuff if we aren't a
 396                          * single-threaded program. However, we
 397                          * luckily know we are not */
 398                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 399                                 continue;
 400
 401                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 402                                 continue;
 403
 404                         /* Ignore kernel threads. Since they can only
 405                          * exist in the root cgroup, we only check for
 406                          * them there. */
 407                         if (cfrom &&
 408                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 409                             is_kernel_thread(pid) > 0)
 410                                 continue;
 411
 412                         r = cg_attach(cto, pto, pid);
 413                         if (r < 0) {
 414                                 if (ret >= 0 && r != -ESRCH)
 415                                         ret = r;
 416                         } else if (ret == 0)
 417                                 ret = 1;
 418
 419                         done = false;
 420
 421                         r = set_put(s, PID_TO_PTR(pid));
 422                         if (r < 0) {
 423                                 if (ret >= 0)
 424                                         return r;
 425
 426                                 return ret;
 427                         }
 428                 }
 429
 430                 if (r < 0) {
 431                         if (ret >= 0)
 432                                 return r;
 433
 434                         return ret;
 435                 }
 436         } while (!done);
 437
 438         return ret;
 439 }
 440
 441 int cg_migrate_recursive(
 442                 const char *cfrom,
 443                 const char *pfrom,
 444                 const char *cto,
 445                 const char *pto,
 446                 CGroupFlags flags) {
 447
 448         _cleanup_closedir_ DIR *d = NULL;
 449         int r, ret = 0;
 450         char *fn;
 451
 452         assert(cfrom);
 453         assert(pfrom);
 454         assert(cto);
 455         assert(pto);
 456
 457         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 458
 459         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 460         if (r < 0) {
 461                 if (ret >= 0 && r != -ENOENT)
 462                         return r;
 463
 464                 return ret;
 465         }
 466
 467         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 468                 _cleanup_free_ char *p = NULL;
 469
 470                 p = strjoin(pfrom, "/", fn, NULL);
 471                 free(fn);
 472                 if (!p)
 473                         return -ENOMEM;
 474
 475                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 476                 if (r != 0 && ret >= 0)
 477                         ret = r;
 478         }
 479
 480         if (r < 0 && ret >= 0)
 481                 ret = r;
 482
 483         if (flags & CGROUP_REMOVE) {
 484                 r = cg_rmdir(cfrom, pfrom);
 485                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 486                         return r;
 487         }
 488
 489         return ret;
 490 }
 491
 492 int cg_migrate_recursive_fallback(
 493                 const char *cfrom,
 494                 const char *pfrom,
 495                 const char *cto,
 496                 const char *pto,
 497                 CGroupFlags flags) {
 498
 499         int r;
 500
 501         assert(cfrom);
 502         assert(pfrom);
 503         assert(cto);
 504         assert(pto);
 505
 506         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 507         if (r < 0) {
 508                 char prefix[strlen(pto) + 1];
 509
 510                 /* This didn't work? Then let's try all prefixes of the destination */
 511
 512                 PATH_FOREACH_PREFIX(prefix, pto) {
 513                         int q;
 514
 515                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 516                         if (q >= 0)
 517                                 return q;
 518                 }
 519         }
 520
 521         return r;
 522 }
 523
 524 static const char *controller_to_dirname(const char *controller) {
 525         const char *e;
 526
 527         assert(controller);
 528
 529         /* Converts a controller name to the directory name below
 530          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 531          * just cuts off the name= prefixed used for named
 532          * hierarchies, if it is specified. */
 533
 534         e = startswith(controller, "name=");
 535         if (e)
 536                 return e;
 537
 538         return controller;
 539 }
 540
 541 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 542         const char *dn;
 543         char *t = NULL;
 544
 545         assert(fs);
 546         assert(controller);
 547
 548         dn = controller_to_dirname(controller);
 549
 550         if (isempty(path) && isempty(suffix))
 551                 t = strappend("/sys/fs/cgroup/", dn);
 552         else if (isempty(path))
 553                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 554         else if (isempty(suffix))
 555                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 556         else
 557                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 558         if (!t)
 559                 return -ENOMEM;
 560
 561         *fs = t;
 562         return 0;
 563 }
 564
 565 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 566         char *t;
 567
 568         assert(fs);
 569
 570         if (isempty(path) && isempty(suffix))
 571                 t = strdup("/sys/fs/cgroup");
 572         else if (isempty(path))
 573                 t = strappend("/sys/fs/cgroup/", suffix);
 574         else if (isempty(suffix))
 575                 t = strappend("/sys/fs/cgroup/", path);
 576         else
 577                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 578         if (!t)
 579                 return -ENOMEM;
 580
 581         *fs = t;
 582         return 0;
 583 }
 584
 585 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 586         int unified, r;
 587
 588         assert(fs);
 589
 590         if (!controller) {
 591                 char *t;
 592
 593                 /* If no controller is specified, we return the path
 594                  * *below* the controllers, without any prefix. */
 595
 596                 if (!path && !suffix)
 597                         return -EINVAL;
 598
 599                 if (!suffix)
 600                         t = strdup(path);
 601                 else if (!path)
 602                         t = strdup(suffix);
 603                 else
 604                         t = strjoin(path, "/", suffix, NULL);
 605                 if (!t)
 606                         return -ENOMEM;
 607
 608                 *fs = path_kill_slashes(t);
 609                 return 0;
 610         }
 611
 612         if (!cg_controller_is_valid(controller))
 613                 return -EINVAL;
 614
 615         unified = cg_unified();
 616         if (unified < 0)
 617                 return unified;
 618
 619         if (unified > 0)
 620                 r = join_path_unified(path, suffix, fs);
 621         else
 622                 r = join_path_legacy(controller, path, suffix, fs);
 623         if (r < 0)
 624                 return r;
 625
 626         path_kill_slashes(*fs);
 627         return 0;
 628 }
 629
 630 static int controller_is_accessible(const char *controller) {
 631         int unified;
 632
 633         assert(controller);
 634
 635         /* Checks whether a specific controller is accessible,
 636          * i.e. its hierarchy mounted. In the unified hierarchy all
 637          * controllers are considered accessible, except for the named
 638          * hierarchies */
 639
 640         if (!cg_controller_is_valid(controller))
 641                 return -EINVAL;
 642
 643         unified = cg_unified();
 644         if (unified < 0)
 645                 return unified;
 646         if (unified > 0) {
 647                 /* We don't support named hierarchies if we are using
 648                  * the unified hierarchy. */
 649
 650                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 651                         return 0;
 652
 653                 if (startswith(controller, "name="))
 654                         return -EOPNOTSUPP;
 655
 656         } else {
 657                 const char *cc, *dn;
 658
 659                 dn = controller_to_dirname(controller);
 660                 cc = strjoina("/sys/fs/cgroup/", dn);
 661
 662                 if (laccess(cc, F_OK) < 0)
 663                         return -errno;
 664         }
 665
 666         return 0;
 667 }
 668
 669 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 670         int r;
 671
 672         assert(controller);
 673         assert(fs);
 674
 675         /* Check if the specified controller is actually accessible */
 676         r = controller_is_accessible(controller);
 677         if (r < 0)
 678                 return r;
 679
 680         return cg_get_path(controller, path, suffix, fs);
 681 }
 682
 683 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 684         assert(path);
 685         assert(sb);
 686         assert(ftwbuf);
 687
 688         if (typeflag != FTW_DP)
 689                 return 0;
 690
 691         if (ftwbuf->level < 1)
 692                 return 0;
 693
 694         (void) rmdir(path);
 695         return 0;
 696 }
 697
 698 int cg_trim(const char *controller, const char *path, bool delete_root) {
 699         _cleanup_free_ char *fs = NULL;
 700         int r = 0;
 701
 702         assert(path);
 703
 704         r = cg_get_path(controller, path, NULL, &fs);
 705         if (r < 0)
 706                 return r;
 707
 708         errno = 0;
 709         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 710                 if (errno == ENOENT)
 711                         r = 0;
 712                 else if (errno > 0)
 713                         r = -errno;
 714                 else
 715                         r = -EIO;
 716         }
 717
 718         if (delete_root) {
 719                 if (rmdir(fs) < 0 && errno != ENOENT)
 720                         return -errno;
 721         }
 722
 723         return r;
 724 }
 725
 726 int cg_create(const char *controller, const char *path) {
 727         _cleanup_free_ char *fs = NULL;
 728         int r;
 729
 730         r = cg_get_path_and_check(controller, path, NULL, &fs);
 731         if (r < 0)
 732                 return r;
 733
 734         r = mkdir_parents(fs, 0755);
 735         if (r < 0)
 736                 return r;
 737
 738         if (mkdir(fs, 0755) < 0) {
 739
 740                 if (errno == EEXIST)
 741                         return 0;
 742
 743                 return -errno;
 744         }
 745
 746         return 1;
 747 }
 748
 749 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 750         int r, q;
 751
 752         assert(pid >= 0);
 753
 754         r = cg_create(controller, path);
 755         if (r < 0)
 756                 return r;
 757
 758         q = cg_attach(controller, path, pid);
 759         if (q < 0)
 760                 return q;
 761
 762         /* This does not remove the cgroup on failure */
 763         return r;
 764 }
 765
 766 int cg_attach(const char *controller, const char *path, pid_t pid) {
 767         _cleanup_free_ char *fs = NULL;
 768         char c[DECIMAL_STR_MAX(pid_t) + 2];
 769         int r;
 770
 771         assert(path);
 772         assert(pid >= 0);
 773
 774         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 775         if (r < 0)
 776                 return r;
 777
 778         if (pid == 0)
 779                 pid = getpid();
 780
 781         xsprintf(c, PID_FMT "\n", pid);
 782
 783         return write_string_file(fs, c, 0);
 784 }
 785
 786 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 787         int r;
 788
 789         assert(controller);
 790         assert(path);
 791         assert(pid >= 0);
 792
 793         r = cg_attach(controller, path, pid);
 794         if (r < 0) {
 795                 char prefix[strlen(path) + 1];
 796
 797                 /* This didn't work? Then let's try all prefixes of
 798                  * the destination */
 799
 800                 PATH_FOREACH_PREFIX(prefix, path) {
 801                         int q;
 802
 803                         q = cg_attach(controller, prefix, pid);
 804                         if (q >= 0)
 805                                 return q;
 806                 }
 807         }
 808
 809         return r;
 810 }
 811
 812 #if 0 /// UNNEEDED by elogind
 813 int cg_set_group_access(
 814                 const char *controller,
 815                 const char *path,
 816                 mode_t mode,
 817                 uid_t uid,
 818                 gid_t gid) {
 819
 820         _cleanup_free_ char *fs = NULL;
 821         int r;
 822
 823         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 824                 return 0;
 825
 826         if (mode != MODE_INVALID)
 827                 mode &= 0777;
 828
 829         r = cg_get_path(controller, path, NULL, &fs);
 830         if (r < 0)
 831                 return r;
 832
 833         return chmod_and_chown(fs, mode, uid, gid);
 834 }
 835
 836 int cg_set_task_access(
 837                 const char *controller,
 838                 const char *path,
 839                 mode_t mode,
 840                 uid_t uid,
 841                 gid_t gid) {
 842
 843         _cleanup_free_ char *fs = NULL, *procs = NULL;
 844         int r, unified;
 845
 846         assert(path);
 847
 848         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 849                 return 0;
 850
 851         if (mode != MODE_INVALID)
 852                 mode &= 0666;
 853
 854         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 855         if (r < 0)
 856                 return r;
 857
 858         r = chmod_and_chown(fs, mode, uid, gid);
 859         if (r < 0)
 860                 return r;
 861
 862         unified = cg_unified();
 863         if (unified < 0)
 864                 return unified;
 865         if (unified)
 866                 return 0;
 867
 868         /* Compatibility, Always keep values for "tasks" in sync with
 869          * "cgroup.procs" */
 870         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 871                 (void) chmod_and_chown(procs, mode, uid, gid);
 872
 873         return 0;
 874 }
 875 #endif // 0
 876
 877 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 878         _cleanup_fclose_ FILE *f = NULL;
 879         char line[LINE_MAX];
 880         const char *fs;
 881         size_t cs = 0;
 882         int unified;
 883
 884         assert(path);
 885         assert(pid >= 0);
 886
 887         unified = cg_unified();
 888         if (unified < 0)
 889                 return unified;
 890         if (unified == 0) {
 891                 if (controller) {
 892                         if (!cg_controller_is_valid(controller))
 893                                 return -EINVAL;
 894                 } else
 895                         controller = SYSTEMD_CGROUP_CONTROLLER;
 896
 897                 cs = strlen(controller);
 898         }
 899
 900         fs = procfs_file_alloca(pid, "cgroup");
 901         log_debug_elogind("Searching for PID %u in \"%s\" (controller \"%s\")",
 902                           pid, fs, controller);
 903         f = fopen(fs, "re");
 904         if (!f)
 905                 return errno == ENOENT ? -ESRCH : -errno;
 906
 907         FOREACH_LINE(line, f, return -errno) {
 908                 char *e, *p;
 909
 910                 truncate_nl(line);
 911
 912                 if (unified) {
 913                         e = startswith(line, "0:");
 914                         if (!e)
 915                                 continue;
 916
 917                         e = strchr(e, ':');
 918                         if (!e)
 919                                 continue;
 920                 } else {
 921                         char *l;
 922                         size_t k;
 923                         const char *word, *state;
 924                         bool found = false;
 925
 926                         l = strchr(line, ':');
 927                         if (!l)
 928                                 continue;
 929
 930                         l++;
 931                         e = strchr(l, ':');
 932                         if (!e)
 933                                 continue;
 934
 935                         *e = 0;
 936                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 937                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 938                                         found = true;
 939                                         break;
 940                                 }
 941                         }
 942
 943                         if (!found)
 944                                 continue;
 945                 }
 946
 947                 log_debug_elogind("Found %s:%s", line, e+1);
 948                 p = strdup(e + 1);
 949                 if (!p)
 950                         return -ENOMEM;
 951
 952                 *path = p;
 953                 return 0;
 954         }
 955
 956         return -ENODATA;
 957 }
 958
 959 int cg_install_release_agent(const char *controller, const char *agent) {
 960         _cleanup_free_ char *fs = NULL, *contents = NULL;
 961         const char *sc;
 962         int r, unified;
 963
 964         assert(agent);
 965
 966         unified = cg_unified();
 967         if (unified < 0)
 968                 return unified;
 969         if (unified) /* doesn't apply to unified hierarchy */
 970                 return -EOPNOTSUPP;
 971
 972         r = cg_get_path(controller, NULL, "release_agent", &fs);
 973         if (r < 0)
 974                 return r;
 975
 976         r = read_one_line_file(fs, &contents);
 977         if (r < 0)
 978                 return r;
 979
 980         sc = strstrip(contents);
 981         if (isempty(sc)) {
 982                 r = write_string_file(fs, agent, 0);
 983                 if (r < 0)
 984                         return r;
 985         } else if (!path_equal(sc, agent))
 986                 return -EEXIST;
 987
 988         fs = mfree(fs);
 989         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 990         if (r < 0)
 991                 return r;
 992
 993         contents = mfree(contents);
 994         r = read_one_line_file(fs, &contents);
 995         if (r < 0)
 996                 return r;
 997
 998         sc = strstrip(contents);
 999         if (streq(sc, "0")) {
1000                 r = write_string_file(fs, "1", 0);
1001                 if (r < 0)
1002                         return r;
1003
1004                 return 1;
1005         }
1006
1007         if (!streq(sc, "1"))
1008                 return -EIO;
1009
1010         return 0;
1011 }
1012
1013 int cg_uninstall_release_agent(const char *controller) {
1014         _cleanup_free_ char *fs = NULL;
1015         int r, unified;
1016
1017         unified = cg_unified();
1018         if (unified < 0)
1019                 return unified;
1020         if (unified) /* Doesn't apply to unified hierarchy */
1021                 return -EOPNOTSUPP;
1022
1023         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1024         if (r < 0)
1025                 return r;
1026
1027         r = write_string_file(fs, "0", 0);
1028         if (r < 0)
1029                 return r;
1030
1031         fs = mfree(fs);
1032
1033         r = cg_get_path(controller, NULL, "release_agent", &fs);
1034         if (r < 0)
1035                 return r;
1036
1037         r = write_string_file(fs, "", 0);
1038         if (r < 0)
1039                 return r;
1040
1041         return 0;
1042 }
1043
1044 int cg_is_empty(const char *controller, const char *path) {
1045         _cleanup_fclose_ FILE *f = NULL;
1046         pid_t pid;
1047         int r;
1048
1049         assert(path);
1050
1051         r = cg_enumerate_processes(controller, path, &f);
1052         if (r == -ENOENT)
1053                 return 1;
1054         if (r < 0)
1055                 return r;
1056
1057         r = cg_read_pid(f, &pid);
1058         if (r < 0)
1059                 return r;
1060
1061         return r == 0;
1062 }
1063
1064 int cg_is_empty_recursive(const char *controller, const char *path) {
1065         int unified, r;
1066
1067         assert(path);
1068
1069         /* The root cgroup is always populated */
1070         if (controller && (isempty(path) || path_equal(path, "/")))
1071                 return false;
1072
1073         unified = cg_unified();
1074         if (unified < 0)
1075                 return unified;
1076
1077         if (unified > 0) {
1078                 _cleanup_free_ char *t = NULL;
1079
1080                 /* On the unified hierarchy we can check empty state
1081                  * via the "populated" attribute of "cgroup.events". */
1082
1083                 r = cg_read_event(controller, path, "populated", &t);
1084                 if (r < 0)
1085                         return r;
1086
1087                 return streq(t, "0");
1088         } else {
1089                 _cleanup_closedir_ DIR *d = NULL;
1090                 char *fn;
1091
1092                 r = cg_is_empty(controller, path);
1093                 if (r <= 0)
1094                         return r;
1095
1096                 r = cg_enumerate_subgroups(controller, path, &d);
1097                 if (r == -ENOENT)
1098                         return 1;
1099                 if (r < 0)
1100                         return r;
1101
1102                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1103                         _cleanup_free_ char *p = NULL;
1104
1105                         p = strjoin(path, "/", fn, NULL);
1106                         free(fn);
1107                         if (!p)
1108                                 return -ENOMEM;
1109
1110                         r = cg_is_empty_recursive(controller, p);
1111                         if (r <= 0)
1112                                 return r;
1113                 }
1114                 if (r < 0)
1115                         return r;
1116
1117                 return true;
1118         }
1119 }
1120
1121 int cg_split_spec(const char *spec, char **controller, char **path) {
1122         char *t = NULL, *u = NULL;
1123         const char *e;
1124
1125         assert(spec);
1126
1127         if (*spec == '/') {
1128                 if (!path_is_safe(spec))
1129                         return -EINVAL;
1130
1131                 if (path) {
1132                         t = strdup(spec);
1133                         if (!t)
1134                                 return -ENOMEM;
1135
1136                         *path = path_kill_slashes(t);
1137                 }
1138
1139                 if (controller)
1140                         *controller = NULL;
1141
1142                 return 0;
1143         }
1144
1145         e = strchr(spec, ':');
1146         if (!e) {
1147                 if (!cg_controller_is_valid(spec))
1148                         return -EINVAL;
1149
1150                 if (controller) {
1151                         t = strdup(spec);
1152                         if (!t)
1153                                 return -ENOMEM;
1154
1155                         *controller = t;
1156                 }
1157
1158                 if (path)
1159                         *path = NULL;
1160
1161                 return 0;
1162         }
1163
1164         t = strndup(spec, e-spec);
1165         if (!t)
1166                 return -ENOMEM;
1167         if (!cg_controller_is_valid(t)) {
1168                 free(t);
1169                 return -EINVAL;
1170         }
1171
1172         if (isempty(e+1))
1173                 u = NULL;
1174         else {
1175                 u = strdup(e+1);
1176                 if (!u) {
1177                         free(t);
1178                         return -ENOMEM;
1179                 }
1180
1181                 if (!path_is_safe(u) ||
1182                     !path_is_absolute(u)) {
1183                         free(t);
1184                         free(u);
1185                         return -EINVAL;
1186                 }
1187
1188                 path_kill_slashes(u);
1189         }
1190
1191         if (controller)
1192                 *controller = t;
1193         else
1194                 free(t);
1195
1196         if (path)
1197                 *path = u;
1198         else
1199                 free(u);
1200
1201         return 0;
1202 }
1203
1204 int cg_mangle_path(const char *path, char **result) {
1205         _cleanup_free_ char *c = NULL, *p = NULL;
1206         char *t;
1207         int r;
1208
1209         assert(path);
1210         assert(result);
1211
1212         /* First, check if it already is a filesystem path */
1213         if (path_startswith(path, "/sys/fs/cgroup")) {
1214
1215                 t = strdup(path);
1216                 if (!t)
1217                         return -ENOMEM;
1218
1219                 *result = path_kill_slashes(t);
1220                 return 0;
1221         }
1222
1223         /* Otherwise, treat it as cg spec */
1224         r = cg_split_spec(path, &c, &p);
1225         if (r < 0)
1226                 return r;
1227
1228         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1229 }
1230
1231 int cg_get_root_path(char **path) {
1232 #if 0 /// elogind does not support systemd scopes and slices
1233         char *p, *e;
1234         int r;
1235
1236         assert(path);
1237
1238         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1239         if (r < 0)
1240                 return r;
1241
1242         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1243         if (!e)
1244                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1245         if (!e)
1246                 e = endswith(p, "/system"); /* even more legacy */
1247         if (e)
1248                 *e = 0;
1249
1250         *path = p;
1251         return 0;
1252 #else
1253         assert(path);
1254         return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1255 #endif // 0
1256 }
1257
1258 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1259         _cleanup_free_ char *rt = NULL;
1260         char *p;
1261         int r;
1262
1263         assert(cgroup);
1264         assert(shifted);
1265
1266         if (!root) {
1267                 /* If the root was specified let's use that, otherwise
1268                  * let's determine it from PID 1 */
1269
1270                 r = cg_get_root_path(&rt);
1271                 if (r < 0)
1272                         return r;
1273
1274                 root = rt;
1275                 log_debug_elogind("Determined root path: \"%s\"", root);
1276         }
1277
1278         p = path_startswith(cgroup, root);
1279         if (p && p[0] && (p > cgroup))
1280                 *shifted = p - 1;
1281         else
1282                 *shifted = cgroup;
1283
1284         return 0;
1285 }
1286
1287 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1288         _cleanup_free_ char *raw = NULL;
1289         const char *c;
1290         int r;
1291
1292         assert(pid >= 0);
1293         assert(cgroup);
1294
1295         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1296         if (r < 0)
1297                 return r;
1298
1299         log_debug_elogind("Shifting path: \"%s\" (PID %u, root: \"%s\")",
1300                           raw, pid, root ? root : "NULL");
1301         r = cg_shift_path(raw, root, &c);
1302         if (r < 0)
1303                 return r;
1304
1305         if (c == raw) {
1306                 *cgroup = raw;
1307                 raw = NULL;
1308         } else {
1309                 char *n;
1310
1311                 n = strdup(c);
1312                 if (!n)
1313                         return -ENOMEM;
1314
1315                 *cgroup = n;
1316         }
1317         log_debug_elogind("Resulting cgroup:\"%s\"", *cgroup);
1318
1319         return 0;
1320 }
1321
1322 #if 0 /// UNNEEDED by elogind
1323 int cg_path_decode_unit(const char *cgroup, char **unit) {
1324         char *c, *s;
1325         size_t n;
1326
1327         assert(cgroup);
1328         assert(unit);
1329
1330         n = strcspn(cgroup, "/");
1331         if (n < 3)
1332                 return -ENXIO;
1333
1334         c = strndupa(cgroup, n);
1335         c = cg_unescape(c);
1336
1337         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1338                 return -ENXIO;
1339
1340         s = strdup(c);
1341         if (!s)
1342                 return -ENOMEM;
1343
1344         *unit = s;
1345         return 0;
1346 }
1347
1348 static bool valid_slice_name(const char *p, size_t n) {
1349
1350         if (!p)
1351                 return false;
1352
1353         if (n < strlen("x.slice"))
1354                 return false;
1355
1356         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1357                 char buf[n+1], *c;
1358
1359                 memcpy(buf, p, n);
1360                 buf[n] = 0;
1361
1362                 c = cg_unescape(buf);
1363
1364                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1365         }
1366
1367         return false;
1368 }
1369
1370 static const char *skip_slices(const char *p) {
1371         assert(p);
1372
1373         /* Skips over all slice assignments */
1374
1375         for (;;) {
1376                 size_t n;
1377
1378                 p += strspn(p, "/");
1379
1380                 n = strcspn(p, "/");
1381                 if (!valid_slice_name(p, n))
1382                         return p;
1383
1384                 p += n;
1385         }
1386 }
1387
1388 int cg_path_get_unit(const char *path, char **ret) {
1389         const char *e;
1390         char *unit;
1391         int r;
1392
1393         assert(path);
1394         assert(ret);
1395
1396         e = skip_slices(path);
1397
1398         r = cg_path_decode_unit(e, &unit);
1399         if (r < 0)
1400                 return r;
1401
1402         /* We skipped over the slices, don't accept any now */
1403         if (endswith(unit, ".slice")) {
1404                 free(unit);
1405                 return -ENXIO;
1406         }
1407
1408         *ret = unit;
1409         return 0;
1410 }
1411
1412 int cg_pid_get_unit(pid_t pid, char **unit) {
1413         _cleanup_free_ char *cgroup = NULL;
1414         int r;
1415
1416         assert(unit);
1417
1418         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1419         if (r < 0)
1420                 return r;
1421
1422         return cg_path_get_unit(cgroup, unit);
1423 }
1424
1425 /**
1426  * Skip session-*.scope, but require it to be there.
1427  */
1428 static const char *skip_session(const char *p) {
1429         size_t n;
1430
1431         if (isempty(p))
1432                 return NULL;
1433
1434         p += strspn(p, "/");
1435
1436         n = strcspn(p, "/");
1437         if (n < strlen("session-x.scope"))
1438                 return NULL;
1439
1440         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1441                 char buf[n - 8 - 6 + 1];
1442
1443                 memcpy(buf, p + 8, n - 8 - 6);
1444                 buf[n - 8 - 6] = 0;
1445
1446                 /* Note that session scopes never need unescaping,
1447                  * since they cannot conflict with the kernel's own
1448                  * names, hence we don't need to call cg_unescape()
1449                  * here. */
1450
1451                 if (!session_id_valid(buf))
1452                         return false;
1453
1454                 p += n;
1455                 p += strspn(p, "/");
1456                 return p;
1457         }
1458
1459         return NULL;
1460 }
1461
1462 /**
1463  * Skip user@*.service, but require it to be there.
1464  */
1465 static const char *skip_user_manager(const char *p) {
1466         size_t n;
1467
1468         if (isempty(p))
1469                 return NULL;
1470
1471         p += strspn(p, "/");
1472
1473         n = strcspn(p, "/");
1474         if (n < strlen("user@x.service"))
1475                 return NULL;
1476
1477         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1478                 char buf[n - 5 - 8 + 1];
1479
1480                 memcpy(buf, p + 5, n - 5 - 8);
1481                 buf[n - 5 - 8] = 0;
1482
1483                 /* Note that user manager services never need unescaping,
1484                  * since they cannot conflict with the kernel's own
1485                  * names, hence we don't need to call cg_unescape()
1486                  * here. */
1487
1488                 if (parse_uid(buf, NULL) < 0)
1489                         return NULL;
1490
1491                 p += n;
1492                 p += strspn(p, "/");
1493
1494                 return p;
1495         }
1496
1497         return NULL;
1498 }
1499
1500 static const char *skip_user_prefix(const char *path) {
1501         const char *e, *t;
1502
1503         assert(path);
1504
1505         /* Skip slices, if there are any */
1506         e = skip_slices(path);
1507
1508         /* Skip the user manager, if it's in the path now... */
1509         t = skip_user_manager(e);
1510         if (t)
1511                 return t;
1512
1513         /* Alternatively skip the user session if it is in the path... */
1514         return skip_session(e);
1515 }
1516
1517 int cg_path_get_user_unit(const char *path, char **ret) {
1518         const char *t;
1519
1520         assert(path);
1521         assert(ret);
1522
1523         t = skip_user_prefix(path);
1524         if (!t)
1525                 return -ENXIO;
1526
1527         /* And from here on it looks pretty much the same as for a
1528          * system unit, hence let's use the same parser from here
1529          * on. */
1530         return cg_path_get_unit(t, ret);
1531 }
1532
1533 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1534         _cleanup_free_ char *cgroup = NULL;
1535         int r;
1536
1537         assert(unit);
1538
1539         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1540         if (r < 0)
1541                 return r;
1542
1543         return cg_path_get_user_unit(cgroup, unit);
1544 }
1545
1546 int cg_path_get_machine_name(const char *path, char **machine) {
1547         _cleanup_free_ char *u = NULL;
1548         const char *sl;
1549         int r;
1550
1551         r = cg_path_get_unit(path, &u);
1552         if (r < 0)
1553                 return r;
1554
1555         sl = strjoina("/run/systemd/machines/unit:", u);
1556         return readlink_malloc(sl, machine);
1557 }
1558
1559 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1560         _cleanup_free_ char *cgroup = NULL;
1561         int r;
1562
1563         assert(machine);
1564
1565         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1566         if (r < 0)
1567                 return r;
1568
1569         return cg_path_get_machine_name(cgroup, machine);
1570 }
1571 #endif // 0
1572
1573 int cg_path_get_session(const char *path, char **session) {
1574 #if 0 /// UNNEEDED by elogind
1575         _cleanup_free_ char *unit = NULL;
1576         char *start, *end;
1577         int r;
1578
1579         assert(path);
1580
1581         r = cg_path_get_unit(path, &unit);
1582         if (r < 0)
1583                 return r;
1584
1585         start = startswith(unit, "session-");
1586         if (!start)
1587                 return -ENXIO;
1588         end = endswith(start, ".scope");
1589         if (!end)
1590                 return -ENXIO;
1591
1592         *end = 0;
1593         if (!session_id_valid(start))
1594                 return -ENXIO;
1595 #else
1596         /* Elogind uses a flat hierarchy, just "/SESSION".  The only
1597            wrinkle is that SESSION might be escaped.  */
1598         const char *e, *n, *start;
1599
1600         assert(path);
1601         log_debug_elogind("path is \"%s\"", path);
1602         assert(path[0] == '/');
1603
1604         e = path + 1;
1605         n = strchrnul(e, '/');
1606         if (e == n)
1607                 return -ENOENT;
1608
1609         start = strndupa(e, n - e);
1610         start = cg_unescape(start);
1611
1612         if (!start[0])
1613                 return -ENOENT;
1614 #endif // 0
1615
1616         if (session) {
1617                 char *rr;
1618
1619                 log_debug_elogind("found session: \"%s\"", start);
1620                 rr = strdup(start);
1621                 if (!rr)
1622                         return -ENOMEM;
1623
1624                 *session = rr;
1625         }
1626
1627         return 0;
1628 }
1629
1630 int cg_pid_get_session(pid_t pid, char **session) {
1631         _cleanup_free_ char *cgroup = NULL;
1632         int r;
1633
1634         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1635         if (r < 0)
1636                 return r;
1637
1638         return cg_path_get_session(cgroup, session);
1639 }
1640
1641 #if 0 /// UNNEEDED by elogind
1642 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1643         _cleanup_free_ char *slice = NULL;
1644         char *start, *end;
1645         int r;
1646
1647         assert(path);
1648
1649         r = cg_path_get_slice(path, &slice);
1650         if (r < 0)
1651                 return r;
1652
1653         start = startswith(slice, "user-");
1654         if (!start)
1655                 return -ENXIO;
1656         end = endswith(start, ".slice");
1657         if (!end)
1658                 return -ENXIO;
1659
1660         *end = 0;
1661         if (parse_uid(start, uid) < 0)
1662                 return -ENXIO;
1663
1664         return 0;
1665 }
1666
1667 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1668         _cleanup_free_ char *cgroup = NULL;
1669         int r;
1670
1671         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1672         if (r < 0)
1673                 return r;
1674
1675         return cg_path_get_owner_uid(cgroup, uid);
1676 }
1677
1678 int cg_path_get_slice(const char *p, char **slice) {
1679         const char *e = NULL;
1680
1681         assert(p);
1682         assert(slice);
1683
1684         /* Finds the right-most slice unit from the beginning, but
1685          * stops before we come to the first non-slice unit. */
1686
1687         for (;;) {
1688                 size_t n;
1689
1690                 p += strspn(p, "/");
1691
1692                 n = strcspn(p, "/");
1693                 if (!valid_slice_name(p, n)) {
1694
1695                         if (!e) {
1696                                 char *s;
1697
1698                                 s = strdup("-.slice");
1699                                 if (!s)
1700                                         return -ENOMEM;
1701
1702                                 *slice = s;
1703                                 return 0;
1704                         }
1705
1706                         return cg_path_decode_unit(e, slice);
1707                 }
1708
1709                 e = p;
1710                 p += n;
1711         }
1712 }
1713
1714 int cg_pid_get_slice(pid_t pid, char **slice) {
1715         _cleanup_free_ char *cgroup = NULL;
1716         int r;
1717
1718         assert(slice);
1719
1720         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1721         if (r < 0)
1722                 return r;
1723
1724         return cg_path_get_slice(cgroup, slice);
1725 }
1726
1727 int cg_path_get_user_slice(const char *p, char **slice) {
1728         const char *t;
1729         assert(p);
1730         assert(slice);
1731
1732         t = skip_user_prefix(p);
1733         if (!t)
1734                 return -ENXIO;
1735
1736         /* And now it looks pretty much the same as for a system
1737          * slice, so let's just use the same parser from here on. */
1738         return cg_path_get_slice(t, slice);
1739 }
1740
1741 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1742         _cleanup_free_ char *cgroup = NULL;
1743         int r;
1744
1745         assert(slice);
1746
1747         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1748         if (r < 0)
1749                 return r;
1750
1751         return cg_path_get_user_slice(cgroup, slice);
1752 }
1753 #endif // 0
1754
1755 char *cg_escape(const char *p) {
1756         bool need_prefix = false;
1757
1758         /* This implements very minimal escaping for names to be used
1759          * as file names in the cgroup tree: any name which might
1760          * conflict with a kernel name or is prefixed with '_' is
1761          * prefixed with a '_'. That way, when reading cgroup names it
1762          * is sufficient to remove a single prefixing underscore if
1763          * there is one. */
1764
1765         /* The return value of this function (unlike cg_unescape())
1766          * needs free()! */
1767
1768         if (p[0] == 0 ||
1769             p[0] == '_' ||
1770             p[0] == '.' ||
1771             streq(p, "notify_on_release") ||
1772             streq(p, "release_agent") ||
1773             streq(p, "tasks") ||
1774             startswith(p, "cgroup."))
1775                 need_prefix = true;
1776         else {
1777                 const char *dot;
1778
1779                 dot = strrchr(p, '.');
1780                 if (dot) {
1781                         CGroupController c;
1782                         size_t l = dot - p;
1783
1784                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1785                                 const char *n;
1786
1787                                 n = cgroup_controller_to_string(c);
1788
1789                                 if (l != strlen(n))
1790                                         continue;
1791
1792                                 if (memcmp(p, n, l) != 0)
1793                                         continue;
1794
1795                                 need_prefix = true;
1796                                 break;
1797                         }
1798                 }
1799         }
1800
1801         if (need_prefix)
1802                 return strappend("_", p);
1803
1804         return strdup(p);
1805 }
1806
1807 char *cg_unescape(const char *p) {
1808         assert(p);
1809
1810         /* The return value of this function (unlike cg_escape())
1811          * doesn't need free()! */
1812
1813         if (p[0] == '_')
1814                 return (char*) p+1;
1815
1816         return (char*) p;
1817 }
1818
1819 #define CONTROLLER_VALID                        \
1820         DIGITS LETTERS                          \
1821         "_"
1822
1823 bool cg_controller_is_valid(const char *p) {
1824         const char *t, *s;
1825
1826         if (!p)
1827                 return false;
1828
1829         s = startswith(p, "name=");
1830         if (s)
1831                 p = s;
1832
1833         if (*p == 0 || *p == '_')
1834                 return false;
1835
1836         for (t = p; *t; t++)
1837                 if (!strchr(CONTROLLER_VALID, *t))
1838                         return false;
1839
1840         if (t - p > FILENAME_MAX)
1841                 return false;
1842
1843         return true;
1844 }
1845
1846 #if 0 /// UNNEEDED by elogind
1847 int cg_slice_to_path(const char *unit, char **ret) {
1848         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1849         const char *dash;
1850         int r;
1851
1852         assert(unit);
1853         assert(ret);
1854
1855         if (streq(unit, "-.slice")) {
1856                 char *x;
1857
1858                 x = strdup("");
1859                 if (!x)
1860                         return -ENOMEM;
1861                 *ret = x;
1862                 return 0;
1863         }
1864
1865         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1866                 return -EINVAL;
1867
1868         if (!endswith(unit, ".slice"))
1869                 return -EINVAL;
1870
1871         r = unit_name_to_prefix(unit, &p);
1872         if (r < 0)
1873                 return r;
1874
1875         dash = strchr(p, '-');
1876
1877         /* Don't allow initial dashes */
1878         if (dash == p)
1879                 return -EINVAL;
1880
1881         while (dash) {
1882                 _cleanup_free_ char *escaped = NULL;
1883                 char n[dash - p + sizeof(".slice")];
1884
1885                 /* Don't allow trailing or double dashes */
1886                 if (dash[1] == 0 || dash[1] == '-')
1887                         return -EINVAL;
1888
1889                 strcpy(stpncpy(n, p, dash - p), ".slice");
1890                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1891                         return -EINVAL;
1892
1893                 escaped = cg_escape(n);
1894                 if (!escaped)
1895                         return -ENOMEM;
1896
1897                 if (!strextend(&s, escaped, "/", NULL))
1898                         return -ENOMEM;
1899
1900                 dash = strchr(dash+1, '-');
1901         }
1902
1903         e = cg_escape(unit);
1904         if (!e)
1905                 return -ENOMEM;
1906
1907         if (!strextend(&s, e, NULL))
1908                 return -ENOMEM;
1909
1910         *ret = s;
1911         s = NULL;
1912
1913         return 0;
1914 }
1915 #endif // 0
1916
1917 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1918         _cleanup_free_ char *p = NULL;
1919         int r;
1920
1921         r = cg_get_path(controller, path, attribute, &p);
1922         if (r < 0)
1923                 return r;
1924
1925         return write_string_file(p, value, 0);
1926 }
1927
1928 #if 0 /// UNNEEDED by elogind
1929 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1930         _cleanup_free_ char *p = NULL;
1931         int r;
1932
1933         r = cg_get_path(controller, path, attribute, &p);
1934         if (r < 0)
1935                 return r;
1936
1937         return read_one_line_file(p, ret);
1938 }
1939
1940 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1941         CGroupController c;
1942         int r, unified;
1943
1944         /* This one will create a cgroup in our private tree, but also
1945          * duplicate it in the trees specified in mask, and remove it
1946          * in all others */
1947
1948         /* First create the cgroup in our own hierarchy. */
1949         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1950         if (r < 0)
1951                 return r;
1952
1953         /* If we are in the unified hierarchy, we are done now */
1954         unified = cg_unified();
1955         if (unified < 0)
1956                 return unified;
1957         if (unified > 0)
1958                 return 0;
1959
1960         /* Otherwise, do the same in the other hierarchies */
1961         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1962                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1963                 const char *n;
1964
1965                 n = cgroup_controller_to_string(c);
1966
1967                 if (mask & bit)
1968                         (void) cg_create(n, path);
1969                 else if (supported & bit)
1970                         (void) cg_trim(n, path, true);
1971         }
1972
1973         return 0;
1974 }
1975
1976 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1977         CGroupController c;
1978         int r, unified;
1979
1980         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1981         if (r < 0)
1982                 return r;
1983
1984         unified = cg_unified();
1985         if (unified < 0)
1986                 return unified;
1987         if (unified > 0)
1988                 return 0;
1989
1990         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1991                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1992                 const char *p = NULL;
1993
1994                 if (!(supported & bit))
1995                         continue;
1996
1997                 if (path_callback)
1998                         p = path_callback(bit, userdata);
1999
2000                 if (!p)
2001                         p = path;
2002
2003                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2004         }
2005
2006         return 0;
2007 }
2008
2009 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2010         Iterator i;
2011         void *pidp;
2012         int r = 0;
2013
2014         SET_FOREACH(pidp, pids, i) {
2015                 pid_t pid = PTR_TO_PID(pidp);
2016                 int q;
2017
2018                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2019                 if (q < 0 && r >= 0)
2020                         r = q;
2021         }
2022
2023         return r;
2024 }
2025
2026 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2027         CGroupController c;
2028         int r = 0, unified;
2029
2030         if (!path_equal(from, to))  {
2031                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2032                 if (r < 0)
2033                         return r;
2034         }
2035
2036         unified = cg_unified();
2037         if (unified < 0)
2038                 return unified;
2039         if (unified > 0)
2040                 return r;
2041
2042         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2043                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2044                 const char *p = NULL;
2045
2046                 if (!(supported & bit))
2047                         continue;
2048
2049                 if (to_callback)
2050                         p = to_callback(bit, userdata);
2051
2052                 if (!p)
2053                         p = to;
2054
2055                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2056         }
2057
2058         return 0;
2059 }
2060
2061 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2062         CGroupController c;
2063         int r, unified;
2064
2065         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2066         if (r < 0)
2067                 return r;
2068
2069         unified = cg_unified();
2070         if (unified < 0)
2071                 return unified;
2072         if (unified > 0)
2073                 return r;
2074
2075         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2076                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2077
2078                 if (!(supported & bit))
2079                         continue;
2080
2081                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2082         }
2083
2084         return 0;
2085 }
2086 #endif // 0
2087
2088 int cg_mask_supported(CGroupMask *ret) {
2089         CGroupMask mask = 0;
2090         int r, unified;
2091
2092         /* Determines the mask of supported cgroup controllers. Only
2093          * includes controllers we can make sense of and that are
2094          * actually accessible. */
2095
2096         unified = cg_unified();
2097         if (unified < 0)
2098                 return unified;
2099         if (unified > 0) {
2100                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2101                 const char *c;
2102
2103                 /* In the unified hierarchy we can read the supported
2104                  * and accessible controllers from a the top-level
2105                  * cgroup attribute */
2106
2107                 r = cg_get_root_path(&root);
2108                 if (r < 0)
2109                         return r;
2110
2111                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2112                 if (r < 0)
2113                         return r;
2114
2115                 r = read_one_line_file(path, &controllers);
2116                 if (r < 0)
2117                         return r;
2118
2119                 c = controllers;
2120                 for (;;) {
2121                         _cleanup_free_ char *n = NULL;
2122                         CGroupController v;
2123
2124                         r = extract_first_word(&c, &n, NULL, 0);
2125                         if (r < 0)
2126                                 return r;
2127                         if (r == 0)
2128                                 break;
2129
2130                         v = cgroup_controller_from_string(n);
2131                         if (v < 0)
2132                                 continue;
2133
2134                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2135                 }
2136
2137                 /* Currently, we only support the memory, io and pids
2138                  * controller in the unified hierarchy, mask
2139                  * everything else off. */
2140                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2141
2142         } else {
2143                 CGroupController c;
2144
2145                 /* In the legacy hierarchy, we check whether which
2146                  * hierarchies are mounted. */
2147
2148                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2149                         const char *n;
2150
2151                         n = cgroup_controller_to_string(c);
2152                         if (controller_is_accessible(n) >= 0)
2153                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2154                 }
2155         }
2156
2157         *ret = mask;
2158         return 0;
2159 }
2160
2161 #if 0 /// UNNEEDED by elogind
2162 int cg_kernel_controllers(Set *controllers) {
2163         _cleanup_fclose_ FILE *f = NULL;
2164         char buf[LINE_MAX];
2165         int r;
2166
2167         assert(controllers);
2168
2169         /* Determines the full list of kernel-known controllers. Might
2170          * include controllers we don't actually support, arbitrary
2171          * named hierarchies and controllers that aren't currently
2172          * accessible (because not mounted). */
2173
2174         f = fopen("/proc/cgroups", "re");
2175         if (!f) {
2176                 if (errno == ENOENT)
2177                         return 0;
2178                 return -errno;
2179         }
2180
2181         /* Ignore the header line */
2182         (void) fgets(buf, sizeof(buf), f);
2183
2184         for (;;) {
2185                 char *controller;
2186                 int enabled = 0;
2187
2188                 errno = 0;
2189                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2190
2191                         if (feof(f))
2192                                 break;
2193
2194                         if (ferror(f) && errno > 0)
2195                                 return -errno;
2196
2197                         return -EBADMSG;
2198                 }
2199
2200                 if (!enabled) {
2201                         free(controller);
2202                         continue;
2203                 }
2204
2205                 if (!cg_controller_is_valid(controller)) {
2206                         free(controller);
2207                         return -EBADMSG;
2208                 }
2209
2210                 r = set_consume(controllers, controller);
2211                 if (r < 0)
2212                         return r;
2213         }
2214
2215         return 0;
2216 }
2217 #endif // 0
2218
2219 static thread_local int unified_cache = -1;
2220
2221 int cg_unified(void) {
2222         struct statfs fs;
2223
2224         /* Checks if we support the unified hierarchy. Returns an
2225          * error when the cgroup hierarchies aren't mounted yet or we
2226          * have any other trouble determining if the unified hierarchy
2227          * is supported. */
2228
2229         if (unified_cache >= 0)
2230                 return unified_cache;
2231
2232         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2233                 return -errno;
2234
2235 #if 0 /// UNNEEDED by elogind
2236         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2237                 unified_cache = true;
2238         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2239 #else
2240         /* elogind can not support the unified hierarchy as a controller,
2241          * so always assume a classical hierarchy.
2242          * If, ond only *if*, someone really wants to substitute systemd-login
2243          * in an environment managed by systemd with elogin, we might have to
2244          * add such a support. */
2245         if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2246 #endif // 0
2247                 unified_cache = false;
2248         else
2249                 return -ENOMEDIUM;
2250
2251         return unified_cache;
2252 }
2253
2254 #if 0 /// UNNEEDED by elogind
2255 void cg_unified_flush(void) {
2256         unified_cache = -1;
2257 }
2258
2259 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2260         _cleanup_free_ char *fs = NULL;
2261         CGroupController c;
2262         int r, unified;
2263
2264         assert(p);
2265
2266         if (supported == 0)
2267                 return 0;
2268
2269         unified = cg_unified();
2270         if (unified < 0)
2271                 return unified;
2272         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2273                 return 0;
2274
2275         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2276         if (r < 0)
2277                 return r;
2278
2279         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2280                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2281                 const char *n;
2282
2283                 if (!(supported & bit))
2284                         continue;
2285
2286                 n = cgroup_controller_to_string(c);
2287                 {
2288                         char s[1 + strlen(n) + 1];
2289
2290                         s[0] = mask & bit ? '+' : '-';
2291                         strcpy(s + 1, n);
2292
2293                         r = write_string_file(fs, s, 0);
2294                         if (r < 0)
2295                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2296                 }
2297         }
2298
2299         return 0;
2300 }
2301
2302 bool cg_is_unified_wanted(void) {
2303         static thread_local int wanted = -1;
2304         int r, unified;
2305
2306         /* If the hierarchy is already mounted, then follow whatever
2307          * was chosen for it. */
2308         unified = cg_unified();
2309         if (unified >= 0)
2310                 return unified;
2311
2312         /* Otherwise, let's see what the kernel command line has to
2313          * say. Since checking that is expensive, let's cache the
2314          * result. */
2315         if (wanted >= 0)
2316                 return wanted;
2317
2318         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2319         if (r > 0)
2320                 return (wanted = true);
2321         else {
2322                 _cleanup_free_ char *value = NULL;
2323
2324                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2325                 if (r < 0)
2326                         return false;
2327                 if (r == 0)
2328                         return (wanted = false);
2329
2330                 return (wanted = parse_boolean(value) > 0);
2331         }
2332 }
2333
2334 bool cg_is_legacy_wanted(void) {
2335         return !cg_is_unified_wanted();
2336 }
2337 #else
2338 bool cg_is_legacy_wanted(void) {
2339         return true;
2340 }
2341 #endif // 0
2342
2343 #if 0 /// UNNEEDED by elogind
2344 int cg_weight_parse(const char *s, uint64_t *ret) {
2345         uint64_t u;
2346         int r;
2347
2348         if (isempty(s)) {
2349                 *ret = CGROUP_WEIGHT_INVALID;
2350                 return 0;
2351         }
2352
2353         r = safe_atou64(s, &u);
2354         if (r < 0)
2355                 return r;
2356
2357         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2358                 return -ERANGE;
2359
2360         *ret = u;
2361         return 0;
2362 }
2363
2364 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2365         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2366         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2367         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2368         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2369 };
2370
2371 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2372         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2373         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2374         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2375         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2376 };
2377
2378 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2379
2380 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2381         uint64_t u;
2382         int r;
2383
2384         if (isempty(s)) {
2385                 *ret = CGROUP_CPU_SHARES_INVALID;
2386                 return 0;
2387         }
2388
2389         r = safe_atou64(s, &u);
2390         if (r < 0)
2391                 return r;
2392
2393         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2394                 return -ERANGE;
2395
2396         *ret = u;
2397         return 0;
2398 }
2399
2400 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2401         uint64_t u;
2402         int r;
2403
2404         if (isempty(s)) {
2405                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2406                 return 0;
2407         }
2408
2409         r = safe_atou64(s, &u);
2410         if (r < 0)
2411                 return r;
2412
2413         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2414                 return -ERANGE;
2415
2416         *ret = u;
2417         return 0;
2418 }
2419 #endif // 0
2420
2421 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2422         [CGROUP_CONTROLLER_CPU] = "cpu",
2423         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2424         [CGROUP_CONTROLLER_IO] = "io",
2425         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2426         [CGROUP_CONTROLLER_MEMORY] = "memory",
2427         [CGROUP_CONTROLLER_DEVICES] = "devices",
2428         [CGROUP_CONTROLLER_PIDS] = "pids",
2429 };
2430
2431 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);