src/core/cgroup.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2013 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <fcntl.h>
  23
  24 #include "path-util.h"
  25 #include "special.h"
  26 #include "cgroup-util.h"
  27 #include "cgroup.h"
  28
  29 void cgroup_context_init(CGroupContext *c) {
  30         assert(c);
  31
  32         /* Initialize everything to the kernel defaults, assuming the
  33          * structure is preinitialized to 0 */
  34
  35         c->cpu_shares = 1024;
  36         c->memory_limit = c->memory_soft_limit = (uint64_t) -1;
  37         c->blockio_weight = 1000;
  38 }
  39
  40 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
  41         assert(c);
  42         assert(a);
  43
  44         LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a);
  45         free(a->path);
  46         free(a);
  47 }
  48
  49 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
  50         assert(c);
  51         assert(w);
  52
  53         LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w);
  54         free(w->path);
  55         free(w);
  56 }
  57
  58 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
  59         assert(c);
  60         assert(b);
  61
  62         LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b);
  63         free(b->path);
  64         free(b);
  65 }
  66
  67 void cgroup_context_done(CGroupContext *c) {
  68         assert(c);
  69
  70         while (c->blockio_device_weights)
  71                 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
  72
  73         while (c->blockio_device_bandwidths)
  74                 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
  75
  76         while (c->device_allow)
  77                 cgroup_context_free_device_allow(c, c->device_allow);
  78 }
  79
  80 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
  81         CGroupBlockIODeviceBandwidth *b;
  82         CGroupBlockIODeviceWeight *w;
  83         CGroupDeviceAllow *a;
  84
  85         assert(c);
  86         assert(f);
  87
  88         prefix = strempty(prefix);
  89
  90         fprintf(f,
  91                 "%sCPUAccounting=%s\n"
  92                 "%sBlockIOAccounting=%s\n"
  93                 "%sMemoryAccounting=%s\n"
  94                 "%sCPUShares=%lu\n"
  95                 "%sBlockIOWeight%lu\n"
  96                 "%sMemoryLimit=%" PRIu64 "\n"
  97                 "%sMemorySoftLimit=%" PRIu64 "\n"
  98                 "%sDevicePolicy=%s\n",
  99                 prefix, yes_no(c->cpu_accounting),
 100                 prefix, yes_no(c->blockio_accounting),
 101                 prefix, yes_no(c->memory_accounting),
 102                 prefix, c->cpu_shares,
 103                 prefix, c->blockio_weight,
 104                 prefix, c->memory_limit,
 105                 prefix, c->memory_soft_limit,
 106                 prefix, cgroup_device_policy_to_string(c->device_policy));
 107
 108         LIST_FOREACH(device_allow, a, c->device_allow)
 109                 fprintf(f,
 110                         "%sDeviceAllow=%s %s%s%s\n",
 111                         prefix,
 112                         a->path,
 113                         a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
 114
 115         LIST_FOREACH(device_weights, w, c->blockio_device_weights)
 116                 fprintf(f,
 117                         "%sBlockIODeviceWeight=%s %lu",
 118                         prefix,
 119                         w->path,
 120                         w->weight);
 121
 122         LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 123                 char buf[FORMAT_BYTES_MAX];
 124
 125                 fprintf(f,
 126                         "%s%s=%s %s\n",
 127                         prefix,
 128                         b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
 129                         b->path,
 130                         format_bytes(buf, sizeof(buf), b->bandwidth));
 131         }
 132 }
 133
 134 static int lookup_blkio_device(const char *p, dev_t *dev) {
 135         struct stat st;
 136         int r;
 137
 138         assert(p);
 139         assert(dev);
 140
 141         r = stat(p, &st);
 142         if (r < 0) {
 143                 log_warning("Couldn't stat device %s: %m", p);
 144                 return -errno;
 145         }
 146
 147         if (S_ISBLK(st.st_mode))
 148                 *dev = st.st_rdev;
 149         else if (major(st.st_dev) != 0) {
 150                 /* If this is not a device node then find the block
 151                  * device this file is stored on */
 152                 *dev = st.st_dev;
 153
 154                 /* If this is a partition, try to get the originating
 155                  * block device */
 156                 block_get_whole_disk(*dev, dev);
 157         } else {
 158                 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
 159                 return -ENODEV;
 160         }
 161
 162         return 0;
 163 }
 164
 165 static int whitelist_device(const char *path, const char *node, const char *acc) {
 166         char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
 167         struct stat st;
 168         int r;
 169
 170         assert(path);
 171         assert(acc);
 172
 173         if (stat(node, &st) < 0) {
 174                 log_warning("Couldn't stat device %s", node);
 175                 return -errno;
 176         }
 177
 178         if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 179                 log_warning("%s is not a device.", node);
 180                 return -ENODEV;
 181         }
 182
 183         sprintf(buf,
 184                 "%c %u:%u %s",
 185                 S_ISCHR(st.st_mode) ? 'c' : 'b',
 186                 major(st.st_rdev), minor(st.st_rdev),
 187                 acc);
 188
 189         r = cg_set_attribute("devices", path, "devices.allow", buf);
 190         if (r < 0)
 191                 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
 192
 193         return r;
 194 }
 195
 196 void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
 197         int r;
 198
 199         assert(c);
 200         assert(path);
 201
 202         if (mask == 0)
 203                 return;
 204
 205         if (mask & CGROUP_CPU) {
 206                 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
 207
 208                 sprintf(buf, "%lu\n", c->cpu_shares);
 209                 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
 210                 if (r < 0)
 211                         log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
 212         }
 213
 214         if (mask & CGROUP_BLKIO) {
 215                 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
 216                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
 217                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
 218                 CGroupBlockIODeviceWeight *w;
 219                 CGroupBlockIODeviceBandwidth *b;
 220
 221                 sprintf(buf, "%lu\n", c->blockio_weight);
 222                 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
 223                 if (r < 0)
 224                         log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
 225
 226                 /* FIXME: no way to reset this list */
 227                 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
 228                         dev_t dev;
 229
 230                         r = lookup_blkio_device(w->path, &dev);
 231                         if (r < 0)
 232                                 continue;
 233
 234                         sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
 235                         r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
 236                         if (r < 0)
 237                                 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
 238                 }
 239
 240                 /* FIXME: no way to reset this list */
 241                 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 242                         const char *a;
 243                         dev_t dev;
 244
 245                         r = lookup_blkio_device(b->path, &dev);
 246                         if (r < 0)
 247                                 continue;
 248
 249                         a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
 250
 251                         sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
 252                         r = cg_set_attribute("blkio", path, a, buf);
 253                         if (r < 0)
 254                                 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
 255                 }
 256         }
 257
 258         if (mask & CGROUP_MEMORY) {
 259                 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
 260                 if (c->memory_limit != (uint64_t) -1) {
 261                         sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
 262                         r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
 263                 } else
 264                         r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
 265
 266                 if (r < 0)
 267                         log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
 268
 269                 if (c->memory_soft_limit != (uint64_t) -1) {
 270                         sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit);
 271                         r = cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf);
 272                 } else
 273                         r = cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", "-1");
 274
 275                 if (r < 0)
 276                         log_error("Failed to set memory.soft_limit_in_bytes on %s: %s", path, strerror(-r));
 277         }
 278
 279         if (mask & CGROUP_DEVICE) {
 280                 CGroupDeviceAllow *a;
 281
 282                 if (c->device_allow || c->device_policy != CGROUP_AUTO)
 283                         r = cg_set_attribute("devices", path, "devices.deny", "a");
 284                 else
 285                         r = cg_set_attribute("devices", path, "devices.allow", "a");
 286                 if (r < 0)
 287                         log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
 288
 289                 if (c->device_policy == CGROUP_CLOSED ||
 290                     (c->device_policy == CGROUP_AUTO && c->device_allow)) {
 291                         static const char auto_devices[] =
 292                                 "/dev/null\0" "rw\0"
 293                                 "/dev/zero\0" "rw\0"
 294                                 "/dev/full\0" "rw\0"
 295                                 "/dev/random\0" "rw\0"
 296                                 "/dev/urandom\0" "rw\0";
 297
 298                         const char *x, *y;
 299
 300                         NULSTR_FOREACH_PAIR(x, y, auto_devices)
 301                                 whitelist_device(path, x, y);
 302                 }
 303
 304                 LIST_FOREACH(device_allow, a, c->device_allow) {
 305                         char acc[4];
 306                         unsigned k = 0;
 307
 308                         if (a->r)
 309                                 acc[k++] = 'r';
 310                         if (a->w)
 311                                 acc[k++] = 'w';
 312                         if (a->m)
 313                                 acc[k++] = 'm';
 314
 315                         if (k == 0)
 316                                 continue;
 317
 318                         acc[k++] = 0;
 319                         whitelist_device(path, a->path, acc);
 320                 }
 321         }
 322 }
 323
 324 CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
 325         CGroupControllerMask mask = 0;
 326
 327         /* Figure out which controllers we need */
 328
 329         if (c->cpu_accounting || c->cpu_shares != 1024)
 330                 mask |= CGROUP_CPUACCT | CGROUP_CPU;
 331
 332         if (c->blockio_accounting ||
 333             c->blockio_weight != 1000 ||
 334             c->blockio_device_weights ||
 335             c->blockio_device_bandwidths)
 336                 mask |= CGROUP_BLKIO;
 337
 338         if (c->memory_accounting ||
 339             c->memory_limit != (uint64_t) -1 ||
 340             c->memory_soft_limit != (uint64_t) -1)
 341                 mask |= CGROUP_MEMORY;
 342
 343         if (c->device_allow || c->device_policy != CGROUP_AUTO)
 344                 mask |= CGROUP_DEVICE;
 345
 346         return mask;
 347 }
 348
 349 static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
 350         CGroupContext *c;
 351
 352         c = unit_get_cgroup_context(u);
 353         if (!c)
 354                 return 0;
 355
 356         return cgroup_context_get_mask(c);
 357 }
 358
 359 static CGroupControllerMask unit_get_members_mask(Unit *u) {
 360         CGroupControllerMask mask = 0;
 361         Unit *m;
 362         Iterator i;
 363
 364         assert(u);
 365
 366         SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) {
 367
 368                 if (UNIT_DEREF(m->slice) != u)
 369                         continue;
 370
 371                 mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m);
 372         }
 373
 374         return mask;
 375 }
 376
 377 static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
 378         assert(u);
 379
 380         if (!UNIT_ISSET(u->slice))
 381                 return 0;
 382
 383         /* Sibling propagation is only relevant for weight-based
 384          * controllers, so let's mask out everything else */
 385         return unit_get_members_mask(UNIT_DEREF(u->slice)) &
 386                 (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
 387 }
 388
 389 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
 390         char *path = NULL;
 391         int r;
 392         bool is_in_hash = false;
 393
 394         assert(u);
 395
 396         path = unit_default_cgroup_path(u);
 397         if (!path)
 398                 return -ENOMEM;
 399
 400         r = hashmap_put(u->manager->cgroup_unit, path, u);
 401         if (r == 0)
 402                 is_in_hash = true;
 403
 404         if (r < 0) {
 405                 log_error("cgroup %s exists already: %s", path, strerror(-r));
 406                 free(path);
 407                 return r;
 408         }
 409
 410         /* First, create our own group */
 411         r = cg_create_with_mask(mask, path);
 412         if (r < 0)
 413                 log_error("Failed to create cgroup %s: %s", path, strerror(-r));
 414
 415         /* Then, possibly move things over */
 416         if (u->cgroup_path && !streq(path, u->cgroup_path)) {
 417                 r = cg_migrate_with_mask(mask, u->cgroup_path, path);
 418                 if (r < 0)
 419                         log_error("Failed to migrate cgroup %s: %s", path, strerror(-r));
 420         }
 421
 422         if (!is_in_hash) {
 423                 /* And remember the new data */
 424                 free(u->cgroup_path);
 425                 u->cgroup_path = path;
 426         }
 427
 428         u->cgroup_realized = true;
 429         u->cgroup_mask = mask;
 430
 431         return 0;
 432 }
 433
 434 static int unit_realize_cgroup_now(Unit *u) {
 435         CGroupControllerMask mask;
 436
 437         assert(u);
 438
 439         if (u->in_cgroup_queue) {
 440                 LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u);
 441                 u->in_cgroup_queue = false;
 442         }
 443
 444         mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
 445         mask &= u->manager->cgroup_supported;
 446
 447         if (u->cgroup_realized &&
 448             u->cgroup_mask == mask)
 449                 return 0;
 450
 451         /* First, realize parents */
 452         if (UNIT_ISSET(u->slice))
 453                 unit_realize_cgroup_now(UNIT_DEREF(u->slice));
 454
 455         /* And then do the real work */
 456         return unit_create_cgroups(u, mask);
 457 }
 458
 459 static void unit_add_to_cgroup_queue(Unit *u) {
 460
 461         if (u->in_cgroup_queue)
 462                 return;
 463
 464         LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u);
 465         u->in_cgroup_queue = true;
 466 }
 467
 468 unsigned manager_dispatch_cgroup_queue(Manager *m) {
 469         Unit *i;
 470         unsigned n = 0;
 471
 472         while ((i = m->cgroup_queue)) {
 473                 assert(i->in_cgroup_queue);
 474
 475                 if (unit_realize_cgroup_now(i) >= 0)
 476                         cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path);
 477
 478                 n++;
 479         }
 480
 481         return n;
 482 }
 483
 484 static void unit_queue_siblings(Unit *u) {
 485         Unit *slice;
 486
 487         /* This adds the siblings of the specified unit and the
 488          * siblings of all parent units to the cgroup queue. (But
 489          * neither the specified unit itself nor the parents.) */
 490
 491         while ((slice = UNIT_DEREF(u->slice))) {
 492                 Iterator i;
 493                 Unit *m;
 494
 495                 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
 496                         if (m == u)
 497                                 continue;
 498
 499                         if (UNIT_DEREF(m->slice) != slice)
 500                                 continue;
 501
 502                         unit_add_to_cgroup_queue(m);
 503                 }
 504
 505                 u = slice;
 506         }
 507 }
 508
 509 int unit_realize_cgroup(Unit *u) {
 510         CGroupContext *c;
 511         int r;
 512
 513         assert(u);
 514
 515         c = unit_get_cgroup_context(u);
 516         if (!c)
 517                 return 0;
 518
 519         /* So, here's the deal: when realizing the cgroups for this
 520          * unit, we need to first create all parents, but there's more
 521          * actually: for the weight-based controllers we also need to
 522          * make sure that all our siblings (i.e. units that are in the
 523          * same slice as we are) have cgroup too. Otherwise things
 524          * would become very uneven as each of their processes would
 525          * get as much resources as all our group together. This call
 526          * will synchronously create the parent cgroups, but will
 527          * defer work on the siblings to the next event loop
 528          * iteration. */
 529
 530         /* Add all sibling slices to the cgroup queue. */
 531         unit_queue_siblings(u);
 532
 533         /* And realize this one now */
 534         r = unit_realize_cgroup_now(u);
 535
 536         /* And apply the values */
 537         if (r >= 0)
 538                 cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path);
 539
 540         return r;
 541 }
 542
 543 void unit_destroy_cgroup(Unit *u) {
 544         int r;
 545
 546         assert(u);
 547
 548         if (!u->cgroup_path)
 549                 return;
 550
 551         r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
 552         if (r < 0)
 553                 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
 554
 555         hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
 556
 557         free(u->cgroup_path);
 558         u->cgroup_path = NULL;
 559         u->cgroup_realized = false;
 560         u->cgroup_mask = 0;
 561
 562 }
 563
 564 pid_t unit_search_main_pid(Unit *u) {
 565         _cleanup_fclose_ FILE *f = NULL;
 566         pid_t pid = 0, npid, mypid;
 567
 568         assert(u);
 569
 570         if (!u->cgroup_path)
 571                 return 0;
 572
 573         if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
 574                 return 0;
 575
 576         mypid = getpid();
 577         while (cg_read_pid(f, &npid) > 0)  {
 578                 pid_t ppid;
 579
 580                 if (npid == pid)
 581                         continue;
 582
 583                 /* Ignore processes that aren't our kids */
 584                 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
 585                         continue;
 586
 587                 if (pid != 0) {
 588                         /* Dang, there's more than one daemonized PID
 589                         in this group, so we don't know what process
 590                         is the main process. */
 591                         pid = 0;
 592                         break;
 593                 }
 594
 595                 pid = npid;
 596         }
 597
 598         return pid;
 599 }
 600
 601 int manager_setup_cgroup(Manager *m) {
 602         _cleanup_free_ char *path = NULL;
 603         int r;
 604         char *e, *a;
 605
 606         assert(m);
 607
 608         /* 0. Be nice to Ingo Molnar #628004 */
 609         if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
 610                 log_warning("No control group support available, not creating root group.");
 611                 return 0;
 612         }
 613
 614         /* 1. Determine hierarchy */
 615         free(m->cgroup_root);
 616         m->cgroup_root = NULL;
 617
 618         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
 619         if (r < 0) {
 620                 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
 621                 return r;
 622         }
 623
 624         /* Already in /system.slice? If so, let's cut this off again */
 625         if (m->running_as == SYSTEMD_SYSTEM) {
 626                 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
 627                 if (e)
 628                         *e = 0;
 629         }
 630
 631         /* And make sure to store away the root value without trailing
 632          * slash, even for the root dir, so that we can easily prepend
 633          * it everywhere. */
 634         if (streq(m->cgroup_root, "/"))
 635                 m->cgroup_root[0] = 0;
 636
 637         /* 2. Show data */
 638         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
 639         if (r < 0) {
 640                 log_error("Cannot find cgroup mount point: %s", strerror(-r));
 641                 return r;
 642         }
 643
 644         log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
 645
 646         /* 3. Install agent */
 647         if (m->running_as == SYSTEMD_SYSTEM) {
 648                 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
 649                 if (r < 0)
 650                         log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
 651                 else if (r > 0)
 652                         log_debug("Installed release agent.");
 653                 else
 654                         log_debug("Release agent already installed.");
 655         }
 656
 657         /* 4. Realize the system slice and put us in there */
 658         if (m->running_as == SYSTEMD_SYSTEM) {
 659                 a = strappenda(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
 660                 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, a, 0);
 661         } else
 662                 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
 663         if (r < 0) {
 664                 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
 665                 return r;
 666         }
 667
 668         /* 5. And pin it, so that it cannot be unmounted */
 669         if (m->pin_cgroupfs_fd >= 0)
 670                 close_nointr_nofail(m->pin_cgroupfs_fd);
 671
 672         m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
 673         if (r < 0) {
 674                 log_error("Failed to open pin file: %m");
 675                 return -errno;
 676         }
 677
 678         /* 6. Figure out which controllers are supported */
 679         m->cgroup_supported = cg_mask_supported();
 680
 681         return 0;
 682 }
 683
 684 void manager_shutdown_cgroup(Manager *m, bool delete) {
 685         assert(m);
 686
 687         /* We can't really delete the group, since we are in it. But
 688          * let's trim it. */
 689         if (delete && m->cgroup_root)
 690                 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
 691
 692         if (m->pin_cgroupfs_fd >= 0) {
 693                 close_nointr_nofail(m->pin_cgroupfs_fd);
 694                 m->pin_cgroupfs_fd = -1;
 695         }
 696
 697         free(m->cgroup_root);
 698         m->cgroup_root = NULL;
 699 }
 700
 701 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
 702         char *p;
 703         Unit *u;
 704
 705         assert(m);
 706         assert(cgroup);
 707
 708         u = hashmap_get(m->cgroup_unit, cgroup);
 709         if (u)
 710                 return u;
 711
 712         p = strdupa(cgroup);
 713         for (;;) {
 714                 char *e;
 715
 716                 e = strrchr(p, '/');
 717                 if (e == p || !e)
 718                         return NULL;
 719
 720                 *e = 0;
 721
 722                 u = hashmap_get(m->cgroup_unit, p);
 723                 if (u)
 724                         return u;
 725         }
 726 }
 727
 728 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
 729         _cleanup_free_ char *cgroup = NULL;
 730         int r;
 731
 732         assert(m);
 733
 734         if (pid <= 1)
 735                 return NULL;
 736
 737         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
 738         if (r < 0)
 739                 return NULL;
 740
 741         return manager_get_unit_by_cgroup(m, cgroup);
 742 }
 743
 744 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
 745         Unit *u;
 746         int r;
 747
 748         assert(m);
 749         assert(cgroup);
 750
 751         u = manager_get_unit_by_cgroup(m, cgroup);
 752         if (u) {
 753                 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
 754                 if (r > 0) {
 755                         if (UNIT_VTABLE(u)->notify_cgroup_empty)
 756                                 UNIT_VTABLE(u)->notify_cgroup_empty(u);
 757
 758                         unit_add_to_gc_queue(u);
 759                 }
 760         }
 761
 762         return 0;
 763 }
 764
 765 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
 766         [CGROUP_AUTO] = "auto",
 767         [CGROUP_CLOSED] = "closed",
 768         [CGROUP_STRICT] = "strict",
 769 };
 770
 771 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);