src/core/cgroup.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2013 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <fcntl.h>
  23
  24 #include "path-util.h"
  25 #include "special.h"
  26 #include "cgroup-util.h"
  27 #include "cgroup.h"
  28
  29 void cgroup_context_init(CGroupContext *c) {
  30         assert(c);
  31
  32         /* Initialize everything to the kernel defaults, assuming the
  33          * structure is preinitialized to 0 */
  34
  35         c->cpu_shares = 1024;
  36         c->memory_limit = (uint64_t) -1;
  37         c->blockio_weight = 1000;
  38 }
  39
  40 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
  41         assert(c);
  42         assert(a);
  43
  44         LIST_REMOVE(device_allow, c->device_allow, a);
  45         free(a->path);
  46         free(a);
  47 }
  48
  49 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
  50         assert(c);
  51         assert(w);
  52
  53         LIST_REMOVE(device_weights, c->blockio_device_weights, w);
  54         free(w->path);
  55         free(w);
  56 }
  57
  58 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
  59         assert(c);
  60         assert(b);
  61
  62         LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
  63         free(b->path);
  64         free(b);
  65 }
  66
  67 void cgroup_context_done(CGroupContext *c) {
  68         assert(c);
  69
  70         while (c->blockio_device_weights)
  71                 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
  72
  73         while (c->blockio_device_bandwidths)
  74                 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
  75
  76         while (c->device_allow)
  77                 cgroup_context_free_device_allow(c, c->device_allow);
  78 }
  79
  80 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
  81         CGroupBlockIODeviceBandwidth *b;
  82         CGroupBlockIODeviceWeight *w;
  83         CGroupDeviceAllow *a;
  84
  85         assert(c);
  86         assert(f);
  87
  88         prefix = strempty(prefix);
  89
  90         fprintf(f,
  91                 "%sCPUAccounting=%s\n"
  92                 "%sBlockIOAccounting=%s\n"
  93                 "%sMemoryAccounting=%s\n"
  94                 "%sCPUShares=%lu\n"
  95                 "%sBlockIOWeight=%lu\n"
  96                 "%sMemoryLimit=%" PRIu64 "\n"
  97                 "%sDevicePolicy=%s\n",
  98                 prefix, yes_no(c->cpu_accounting),
  99                 prefix, yes_no(c->blockio_accounting),
 100                 prefix, yes_no(c->memory_accounting),
 101                 prefix, c->cpu_shares,
 102                 prefix, c->blockio_weight,
 103                 prefix, c->memory_limit,
 104                 prefix, cgroup_device_policy_to_string(c->device_policy));
 105
 106         LIST_FOREACH(device_allow, a, c->device_allow)
 107                 fprintf(f,
 108                         "%sDeviceAllow=%s %s%s%s\n",
 109                         prefix,
 110                         a->path,
 111                         a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
 112
 113         LIST_FOREACH(device_weights, w, c->blockio_device_weights)
 114                 fprintf(f,
 115                         "%sBlockIODeviceWeight=%s %lu",
 116                         prefix,
 117                         w->path,
 118                         w->weight);
 119
 120         LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 121                 char buf[FORMAT_BYTES_MAX];
 122
 123                 fprintf(f,
 124                         "%s%s=%s %s\n",
 125                         prefix,
 126                         b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
 127                         b->path,
 128                         format_bytes(buf, sizeof(buf), b->bandwidth));
 129         }
 130 }
 131
 132 static int lookup_blkio_device(const char *p, dev_t *dev) {
 133         struct stat st;
 134         int r;
 135
 136         assert(p);
 137         assert(dev);
 138
 139         r = stat(p, &st);
 140         if (r < 0) {
 141                 log_warning("Couldn't stat device %s: %m", p);
 142                 return -errno;
 143         }
 144
 145         if (S_ISBLK(st.st_mode))
 146                 *dev = st.st_rdev;
 147         else if (major(st.st_dev) != 0) {
 148                 /* If this is not a device node then find the block
 149                  * device this file is stored on */
 150                 *dev = st.st_dev;
 151
 152                 /* If this is a partition, try to get the originating
 153                  * block device */
 154                 block_get_whole_disk(*dev, dev);
 155         } else {
 156                 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
 157                 return -ENODEV;
 158         }
 159
 160         return 0;
 161 }
 162
 163 static int whitelist_device(const char *path, const char *node, const char *acc) {
 164         char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
 165         struct stat st;
 166         int r;
 167
 168         assert(path);
 169         assert(acc);
 170
 171         if (stat(node, &st) < 0) {
 172                 log_warning("Couldn't stat device %s", node);
 173                 return -errno;
 174         }
 175
 176         if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 177                 log_warning("%s is not a device.", node);
 178                 return -ENODEV;
 179         }
 180
 181         sprintf(buf,
 182                 "%c %u:%u %s",
 183                 S_ISCHR(st.st_mode) ? 'c' : 'b',
 184                 major(st.st_rdev), minor(st.st_rdev),
 185                 acc);
 186
 187         r = cg_set_attribute("devices", path, "devices.allow", buf);
 188         if (r < 0)
 189                 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
 190
 191         return r;
 192 }
 193
 194 void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
 195         int r;
 196
 197         assert(c);
 198         assert(path);
 199
 200         if (mask == 0)
 201                 return;
 202
 203         if (mask & CGROUP_CPU) {
 204                 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
 205
 206                 sprintf(buf, "%lu\n", c->cpu_shares);
 207                 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
 208                 if (r < 0)
 209                         log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
 210         }
 211
 212         if (mask & CGROUP_BLKIO) {
 213                 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
 214                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
 215                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
 216                 CGroupBlockIODeviceWeight *w;
 217                 CGroupBlockIODeviceBandwidth *b;
 218
 219                 sprintf(buf, "%lu\n", c->blockio_weight);
 220                 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
 221                 if (r < 0)
 222                         log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
 223
 224                 /* FIXME: no way to reset this list */
 225                 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
 226                         dev_t dev;
 227
 228                         r = lookup_blkio_device(w->path, &dev);
 229                         if (r < 0)
 230                                 continue;
 231
 232                         sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
 233                         r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
 234                         if (r < 0)
 235                                 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
 236                 }
 237
 238                 /* FIXME: no way to reset this list */
 239                 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 240                         const char *a;
 241                         dev_t dev;
 242
 243                         r = lookup_blkio_device(b->path, &dev);
 244                         if (r < 0)
 245                                 continue;
 246
 247                         a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
 248
 249                         sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
 250                         r = cg_set_attribute("blkio", path, a, buf);
 251                         if (r < 0)
 252                                 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
 253                 }
 254         }
 255
 256         if (mask & CGROUP_MEMORY) {
 257                 if (c->memory_limit != (uint64_t) -1) {
 258                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
 259
 260                         sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
 261                         r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
 262                 } else
 263                         r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
 264
 265                 if (r < 0)
 266                         log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
 267         }
 268
 269         if (mask & CGROUP_DEVICE) {
 270                 CGroupDeviceAllow *a;
 271
 272                 if (c->device_allow || c->device_policy != CGROUP_AUTO)
 273                         r = cg_set_attribute("devices", path, "devices.deny", "a");
 274                 else
 275                         r = cg_set_attribute("devices", path, "devices.allow", "a");
 276                 if (r < 0)
 277                         log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
 278
 279                 if (c->device_policy == CGROUP_CLOSED ||
 280                     (c->device_policy == CGROUP_AUTO && c->device_allow)) {
 281                         static const char auto_devices[] =
 282                                 "/dev/null\0" "rw\0"
 283                                 "/dev/zero\0" "rw\0"
 284                                 "/dev/full\0" "rw\0"
 285                                 "/dev/random\0" "rw\0"
 286                                 "/dev/urandom\0" "rw\0";
 287
 288                         const char *x, *y;
 289
 290                         NULSTR_FOREACH_PAIR(x, y, auto_devices)
 291                                 whitelist_device(path, x, y);
 292                 }
 293
 294                 LIST_FOREACH(device_allow, a, c->device_allow) {
 295                         char acc[4];
 296                         unsigned k = 0;
 297
 298                         if (a->r)
 299                                 acc[k++] = 'r';
 300                         if (a->w)
 301                                 acc[k++] = 'w';
 302                         if (a->m)
 303                                 acc[k++] = 'm';
 304
 305                         if (k == 0)
 306                                 continue;
 307
 308                         acc[k++] = 0;
 309                         whitelist_device(path, a->path, acc);
 310                 }
 311         }
 312 }
 313
 314 CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
 315         CGroupControllerMask mask = 0;
 316
 317         /* Figure out which controllers we need */
 318
 319         if (c->cpu_accounting || c->cpu_shares != 1024)
 320                 mask |= CGROUP_CPUACCT | CGROUP_CPU;
 321
 322         if (c->blockio_accounting ||
 323             c->blockio_weight != 1000 ||
 324             c->blockio_device_weights ||
 325             c->blockio_device_bandwidths)
 326                 mask |= CGROUP_BLKIO;
 327
 328         if (c->memory_accounting ||
 329             c->memory_limit != (uint64_t) -1)
 330                 mask |= CGROUP_MEMORY;
 331
 332         if (c->device_allow || c->device_policy != CGROUP_AUTO)
 333                 mask |= CGROUP_DEVICE;
 334
 335         return mask;
 336 }
 337
 338 static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
 339         CGroupContext *c;
 340
 341         c = unit_get_cgroup_context(u);
 342         if (!c)
 343                 return 0;
 344
 345         return cgroup_context_get_mask(c);
 346 }
 347
 348 static CGroupControllerMask unit_get_members_mask(Unit *u) {
 349         assert(u);
 350         return u->cgroup_members_mask;
 351 }
 352
 353 static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
 354         assert(u);
 355
 356         if (!UNIT_ISSET(u->slice))
 357                 return 0;
 358
 359         /* Sibling propagation is only relevant for weight-based
 360          * controllers, so let's mask out everything else */
 361         return unit_get_members_mask(UNIT_DEREF(u->slice)) &
 362                 (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
 363 }
 364
 365 static CGroupControllerMask unit_get_target_mask(Unit *u) {
 366         CGroupControllerMask mask;
 367
 368         mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
 369         mask &= u->manager->cgroup_supported;
 370
 371         return mask;
 372 }
 373
 374 /* Recurse from a unit up through its containing slices, propagating
 375  * mask bits upward. A unit is also member of itself. */
 376 void unit_update_member_masks(Unit *u) {
 377         u->cgroup_members_mask |= unit_get_cgroup_mask(u);
 378         if (UNIT_ISSET(u->slice)) {
 379                 Unit *s = UNIT_DEREF(u->slice);
 380                 s->cgroup_members_mask |= u->cgroup_members_mask;
 381                 unit_update_member_masks(s);
 382         }
 383 }
 384
 385 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
 386         _cleanup_free_ char *path;
 387         int r;
 388         bool was_in_hash = false;
 389
 390         assert(u);
 391
 392         path = unit_default_cgroup_path(u);
 393         if (!path)
 394                 return log_oom();
 395
 396         r = hashmap_put(u->manager->cgroup_unit, path, u);
 397         if (r == 0)
 398                 was_in_hash = true;
 399         else if (r < 0) {
 400                 log_error(r == -EEXIST ?
 401                           "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s",
 402                           path, strerror(-r));
 403                 return r;
 404         }
 405
 406         /* First, create our own group */
 407         r = cg_create_everywhere(u->manager->cgroup_supported, mask, path);
 408         if (r < 0)
 409                 log_error("Failed to create cgroup %s: %s", path, strerror(-r));
 410
 411         /* Then, possibly move things over */
 412         if (u->cgroup_path) {
 413                 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, path);
 414                 if (r < 0)
 415                         log_error("Failed to migrate cgroup from %s to %s: %s",
 416                                   u->cgroup_path, path, strerror(-r));
 417         }
 418
 419         if (!was_in_hash) {
 420                 /* Remember the new data */
 421                 free(u->cgroup_path);
 422                 u->cgroup_path = path;
 423                 path = NULL;
 424         }
 425
 426         u->cgroup_realized = true;
 427         u->cgroup_mask = mask;
 428
 429         return 0;
 430 }
 431
 432 static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
 433         return u->cgroup_realized && u->cgroup_mask == mask;
 434 }
 435
 436 /* Check if necessary controllers and attributes for a unit are in place.
 437  *
 438  * If so, do nothing.
 439  * If not, create paths, move processes over, and set attributes.
 440  *
 441  * Returns 0 on success and < 0 on failure. */
 442 static int unit_realize_cgroup_now(Unit *u) {
 443         CGroupControllerMask mask;
 444         int r;
 445
 446         assert(u);
 447
 448         if (u->in_cgroup_queue) {
 449                 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
 450                 u->in_cgroup_queue = false;
 451         }
 452
 453         mask = unit_get_target_mask(u);
 454
 455         /* TODO: Consider skipping this check. It may be redundant. */
 456         if (unit_has_mask_realized(u, mask))
 457                 return 0;
 458
 459         /* First, realize parents */
 460         if (UNIT_ISSET(u->slice)) {
 461                 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice));
 462                 if (r < 0)
 463                         return r;
 464         }
 465
 466         /* And then do the real work */
 467         r = unit_create_cgroups(u, mask);
 468         if (r < 0)
 469                 return r;
 470
 471         /* Finally, apply the necessary attributes. */
 472         cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path);
 473
 474         return 0;
 475 }
 476
 477 static void unit_add_to_cgroup_queue(Unit *u) {
 478
 479         if (u->in_cgroup_queue)
 480                 return;
 481
 482         LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
 483         u->in_cgroup_queue = true;
 484 }
 485
 486 unsigned manager_dispatch_cgroup_queue(Manager *m) {
 487         Unit *i;
 488         unsigned n = 0;
 489         int r;
 490
 491         while ((i = m->cgroup_queue)) {
 492                 assert(i->in_cgroup_queue);
 493
 494                 r = unit_realize_cgroup_now(i);
 495                 if (r < 0)
 496                         log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
 497
 498                 n++;
 499         }
 500
 501         return n;
 502 }
 503
 504 static void unit_queue_siblings(Unit *u) {
 505         Unit *slice;
 506
 507         /* This adds the siblings of the specified unit and the
 508          * siblings of all parent units to the cgroup queue. (But
 509          * neither the specified unit itself nor the parents.) */
 510
 511         while ((slice = UNIT_DEREF(u->slice))) {
 512                 Iterator i;
 513                 Unit *m;
 514
 515                 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
 516                         if (m == u)
 517                                 continue;
 518
 519                         /* Skip units that have a dependency on the slice
 520                          * but aren't actually in it. */
 521                         if (UNIT_DEREF(m->slice) != slice)
 522                                 continue;
 523
 524                         /* No point in doing cgroup application for units
 525                          * without active processes. */
 526                         if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
 527                                 continue;
 528
 529                         /* If the unit doesn't need any new controllers
 530                          * and has current ones realized, it doesn't need
 531                          * any changes. */
 532                         if (unit_has_mask_realized(m, unit_get_target_mask(m)))
 533                                 continue;
 534
 535                         unit_add_to_cgroup_queue(m);
 536                 }
 537
 538                 u = slice;
 539         }
 540 }
 541
 542 int unit_realize_cgroup(Unit *u) {
 543         CGroupContext *c;
 544         int r;
 545
 546         assert(u);
 547
 548         c = unit_get_cgroup_context(u);
 549         if (!c)
 550                 return 0;
 551
 552         /* So, here's the deal: when realizing the cgroups for this
 553          * unit, we need to first create all parents, but there's more
 554          * actually: for the weight-based controllers we also need to
 555          * make sure that all our siblings (i.e. units that are in the
 556          * same slice as we are) have cgroups, too. Otherwise things
 557          * would become very uneven as each of their processes would
 558          * get as much resources as all our group together. This call
 559          * will synchronously create the parent cgroups, but will
 560          * defer work on the siblings to the next event loop
 561          * iteration. */
 562
 563         /* Add all sibling slices to the cgroup queue. */
 564         unit_queue_siblings(u);
 565
 566         /* And realize this one now (and apply the values) */
 567         r = unit_realize_cgroup_now(u);
 568
 569         return r;
 570 }
 571
 572 void unit_destroy_cgroup(Unit *u) {
 573         int r;
 574
 575         assert(u);
 576
 577         if (!u->cgroup_path)
 578                 return;
 579
 580         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
 581         if (r < 0)
 582                 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
 583
 584         hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
 585
 586         free(u->cgroup_path);
 587         u->cgroup_path = NULL;
 588         u->cgroup_realized = false;
 589         u->cgroup_mask = 0;
 590
 591 }
 592
 593 pid_t unit_search_main_pid(Unit *u) {
 594         _cleanup_fclose_ FILE *f = NULL;
 595         pid_t pid = 0, npid, mypid;
 596
 597         assert(u);
 598
 599         if (!u->cgroup_path)
 600                 return 0;
 601
 602         if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
 603                 return 0;
 604
 605         mypid = getpid();
 606         while (cg_read_pid(f, &npid) > 0)  {
 607                 pid_t ppid;
 608
 609                 if (npid == pid)
 610                         continue;
 611
 612                 /* Ignore processes that aren't our kids */
 613                 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
 614                         continue;
 615
 616                 if (pid != 0) {
 617                         /* Dang, there's more than one daemonized PID
 618                         in this group, so we don't know what process
 619                         is the main process. */
 620                         pid = 0;
 621                         break;
 622                 }
 623
 624                 pid = npid;
 625         }
 626
 627         return pid;
 628 }
 629
 630 int manager_setup_cgroup(Manager *m) {
 631         _cleanup_free_ char *path = NULL;
 632         char *e;
 633         int r;
 634
 635         assert(m);
 636
 637         /* 0. Be nice to Ingo Molnar #628004 */
 638         if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
 639                 log_warning("No control group support available, not creating root group.");
 640                 return 0;
 641         }
 642
 643         /* 1. Determine hierarchy */
 644         free(m->cgroup_root);
 645         m->cgroup_root = NULL;
 646
 647         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
 648         if (r < 0) {
 649                 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
 650                 return r;
 651         }
 652
 653         /* LEGACY: Already in /system.slice? If so, let's cut this
 654          * off. This is to support live upgrades from older systemd
 655          * versions where PID 1 was moved there. */
 656         if (m->running_as == SYSTEMD_SYSTEM) {
 657                 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
 658                 if (!e)
 659                         e = endswith(m->cgroup_root, "/system");
 660                 if (e)
 661                         *e = 0;
 662         }
 663
 664         /* And make sure to store away the root value without trailing
 665          * slash, even for the root dir, so that we can easily prepend
 666          * it everywhere. */
 667         if (streq(m->cgroup_root, "/"))
 668                 m->cgroup_root[0] = 0;
 669
 670         /* 2. Show data */
 671         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
 672         if (r < 0) {
 673                 log_error("Cannot find cgroup mount point: %s", strerror(-r));
 674                 return r;
 675         }
 676
 677         log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
 678
 679         /* 3. Install agent */
 680         if (m->running_as == SYSTEMD_SYSTEM) {
 681                 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
 682                 if (r < 0)
 683                         log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
 684                 else if (r > 0)
 685                         log_debug("Installed release agent.");
 686                 else
 687                         log_debug("Release agent already installed.");
 688         }
 689
 690         /* 4. Make sure we are in the root cgroup */
 691         r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
 692         if (r < 0) {
 693                 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
 694                 return r;
 695         }
 696
 697         /* 5. And pin it, so that it cannot be unmounted */
 698         if (m->pin_cgroupfs_fd >= 0)
 699                 close_nointr_nofail(m->pin_cgroupfs_fd);
 700
 701         m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
 702         if (r < 0) {
 703                 log_error("Failed to open pin file: %m");
 704                 return -errno;
 705         }
 706
 707         /* 6. Figure out which controllers are supported */
 708         m->cgroup_supported = cg_mask_supported();
 709
 710         /* 7.  Always enable hierarchial support if it exists... */
 711         cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
 712
 713         return 0;
 714 }
 715
 716 void manager_shutdown_cgroup(Manager *m, bool delete) {
 717         assert(m);
 718
 719         /* We can't really delete the group, since we are in it. But
 720          * let's trim it. */
 721         if (delete && m->cgroup_root)
 722                 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
 723
 724         if (m->pin_cgroupfs_fd >= 0) {
 725                 close_nointr_nofail(m->pin_cgroupfs_fd);
 726                 m->pin_cgroupfs_fd = -1;
 727         }
 728
 729         free(m->cgroup_root);
 730         m->cgroup_root = NULL;
 731 }
 732
 733 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
 734         char *p;
 735         Unit *u;
 736
 737         assert(m);
 738         assert(cgroup);
 739
 740         u = hashmap_get(m->cgroup_unit, cgroup);
 741         if (u)
 742                 return u;
 743
 744         p = strdupa(cgroup);
 745         for (;;) {
 746                 char *e;
 747
 748                 e = strrchr(p, '/');
 749                 if (e == p || !e)
 750                         return NULL;
 751
 752                 *e = 0;
 753
 754                 u = hashmap_get(m->cgroup_unit, p);
 755                 if (u)
 756                         return u;
 757         }
 758 }
 759
 760 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
 761         _cleanup_free_ char *cgroup = NULL;
 762         int r;
 763
 764         assert(m);
 765
 766         if (pid <= 1)
 767                 return NULL;
 768
 769         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
 770         if (r < 0)
 771                 return NULL;
 772
 773         return manager_get_unit_by_cgroup(m, cgroup);
 774 }
 775
 776 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
 777         Unit *u;
 778         int r;
 779
 780         assert(m);
 781         assert(cgroup);
 782
 783         u = manager_get_unit_by_cgroup(m, cgroup);
 784         if (u) {
 785                 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
 786                 if (r > 0) {
 787                         if (UNIT_VTABLE(u)->notify_cgroup_empty)
 788                                 UNIT_VTABLE(u)->notify_cgroup_empty(u);
 789
 790                         unit_add_to_gc_queue(u);
 791                 }
 792         }
 793
 794         return 0;
 795 }
 796
 797 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
 798         [CGROUP_AUTO] = "auto",
 799         [CGROUP_CLOSED] = "closed",
 800         [CGROUP_STRICT] = "strict",
 801 };
 802
 803 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);