src/core/cgroup.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2013 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <fcntl.h>
  23
  24 #include "path-util.h"
  25 #include "special.h"
  26 #include "cgroup-util.h"
  27 #include "cgroup.h"
  28
  29 void cgroup_context_init(CGroupContext *c) {
  30         assert(c);
  31
  32         /* Initialize everything to the kernel defaults, assuming the
  33          * structure is preinitialized to 0 */
  34
  35         c->cpu_shares = 1024;
  36         c->memory_limit = c->memory_soft_limit = (uint64_t) -1;
  37         c->blockio_weight = 1000;
  38 }
  39
  40 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
  41         assert(c);
  42         assert(a);
  43
  44         LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a);
  45         free(a->path);
  46         free(a);
  47 }
  48
  49 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
  50         assert(c);
  51         assert(w);
  52
  53         LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w);
  54         free(w->path);
  55         free(w);
  56 }
  57
  58 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
  59         assert(c);
  60         assert(b);
  61
  62         LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b);
  63         free(b->path);
  64         free(b);
  65 }
  66
  67 void cgroup_context_done(CGroupContext *c) {
  68         assert(c);
  69
  70         while (c->blockio_device_weights)
  71                 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
  72
  73         while (c->blockio_device_bandwidths)
  74                 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
  75
  76         while (c->device_allow)
  77                 cgroup_context_free_device_allow(c, c->device_allow);
  78 }
  79
  80 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
  81         CGroupBlockIODeviceBandwidth *b;
  82         CGroupBlockIODeviceWeight *w;
  83         CGroupDeviceAllow *a;
  84
  85         assert(c);
  86         assert(f);
  87
  88         prefix = strempty(prefix);
  89
  90         fprintf(f,
  91                 "%sCPUAccounting=%s\n"
  92                 "%sBlockIOAccounting=%s\n"
  93                 "%sMemoryAccounting=%s\n"
  94                 "%sCPUShares=%lu\n"
  95                 "%sBlockIOWeight%lu\n"
  96                 "%sMemoryLimit=%" PRIu64 "\n"
  97                 "%sMemorySoftLimit=%" PRIu64 "\n"
  98                 "%sDevicePolicy=%s\n",
  99                 prefix, yes_no(c->cpu_accounting),
 100                 prefix, yes_no(c->blockio_accounting),
 101                 prefix, yes_no(c->memory_accounting),
 102                 prefix, c->cpu_shares,
 103                 prefix, c->blockio_weight,
 104                 prefix, c->memory_limit,
 105                 prefix, c->memory_soft_limit,
 106                 prefix, cgroup_device_policy_to_string(c->device_policy));
 107
 108         LIST_FOREACH(device_allow, a, c->device_allow)
 109                 fprintf(f,
 110                         "%sDeviceAllow=%s %s%s%s\n",
 111                         prefix,
 112                         a->path,
 113                         a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
 114
 115         LIST_FOREACH(device_weights, w, c->blockio_device_weights)
 116                 fprintf(f,
 117                         "%sBlockIODeviceWeight=%s %lu",
 118                         prefix,
 119                         w->path,
 120                         w->weight);
 121
 122         LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 123                 char buf[FORMAT_BYTES_MAX];
 124
 125                 fprintf(f,
 126                         "%s%s=%s %s\n",
 127                         prefix,
 128                         b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
 129                         b->path,
 130                         format_bytes(buf, sizeof(buf), b->bandwidth));
 131         }
 132 }
 133
 134 static int lookup_blkio_device(const char *p, dev_t *dev) {
 135         struct stat st;
 136         int r;
 137
 138         assert(p);
 139         assert(dev);
 140
 141         r = stat(p, &st);
 142         if (r < 0) {
 143                 log_warning("Couldn't stat device %s: %m", p);
 144                 return -errno;
 145         }
 146
 147         if (S_ISBLK(st.st_mode))
 148                 *dev = st.st_rdev;
 149         else if (major(st.st_dev) != 0) {
 150                 /* If this is not a device node then find the block
 151                  * device this file is stored on */
 152                 *dev = st.st_dev;
 153
 154                 /* If this is a partition, try to get the originating
 155                  * block device */
 156                 block_get_whole_disk(*dev, dev);
 157         } else {
 158                 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
 159                 return -ENODEV;
 160         }
 161
 162         return 0;
 163 }
 164
 165 static int whitelist_device(const char *path, const char *node, const char *acc) {
 166         char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
 167         struct stat st;
 168         int r;
 169
 170         assert(path);
 171         assert(acc);
 172
 173         if (stat(node, &st) < 0) {
 174                 log_warning("Couldn't stat device %s", node);
 175                 return -errno;
 176         }
 177
 178         if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 179                 log_warning("%s is not a device.", node);
 180                 return -ENODEV;
 181         }
 182
 183         sprintf(buf,
 184                 "%c %u:%u %s",
 185                 S_ISCHR(st.st_mode) ? 'c' : 'b',
 186                 major(st.st_rdev), minor(st.st_rdev),
 187                 acc);
 188
 189         r = cg_set_attribute("devices", path, "devices.allow", buf);
 190         if (r < 0)
 191                 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
 192
 193         return r;
 194 }
 195
 196 void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
 197         int r;
 198
 199         assert(c);
 200         assert(path);
 201
 202         if (mask == 0)
 203                 return;
 204
 205         if (mask & CGROUP_CPU) {
 206                 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
 207
 208                 sprintf(buf, "%lu\n", c->cpu_shares);
 209                 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
 210                 if (r < 0)
 211                         log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
 212         }
 213
 214         if (mask & CGROUP_BLKIO) {
 215                 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
 216                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
 217                               DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
 218                 CGroupBlockIODeviceWeight *w;
 219                 CGroupBlockIODeviceBandwidth *b;
 220
 221                 sprintf(buf, "%lu\n", c->blockio_weight);
 222                 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
 223                 if (r < 0)
 224                         log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
 225
 226                 /* FIXME: no way to reset this list */
 227                 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
 228                         dev_t dev;
 229
 230                         r = lookup_blkio_device(w->path, &dev);
 231                         if (r < 0)
 232                                 continue;
 233
 234                         sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
 235                         r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
 236                         if (r < 0)
 237                                 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
 238                 }
 239
 240                 /* FIXME: no way to reset this list */
 241                 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
 242                         const char *a;
 243                         dev_t dev;
 244
 245                         r = lookup_blkio_device(b->path, &dev);
 246                         if (r < 0)
 247                                 continue;
 248
 249                         a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
 250
 251                         sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
 252                         r = cg_set_attribute("blkio", path, a, buf);
 253                         if (r < 0)
 254                                 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
 255                 }
 256         }
 257
 258         if (mask & CGROUP_MEMORY) {
 259                 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
 260
 261                 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
 262                 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
 263                 if (r < 0)
 264                         log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
 265
 266                 sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit);
 267                 cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf);
 268                 if (r < 0)
 269                         log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
 270         }
 271
 272         if (mask & CGROUP_DEVICE) {
 273                 CGroupDeviceAllow *a;
 274
 275                 if (c->device_allow || c->device_policy != CGROUP_AUTO)
 276                         r = cg_set_attribute("devices", path, "devices.deny", "a");
 277                 else
 278                         r = cg_set_attribute("devices", path, "devices.allow", "a");
 279                 if (r < 0)
 280                         log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
 281
 282                 if (c->device_policy == CGROUP_CLOSED ||
 283                     (c->device_policy == CGROUP_AUTO && c->device_allow)) {
 284                         static const char auto_devices[] =
 285                                 "/dev/null\0" "rw\0"
 286                                 "/dev/zero\0" "rw\0"
 287                                 "/dev/full\0" "rw\0"
 288                                 "/dev/random\0" "rw\0"
 289                                 "/dev/urandom\0" "rw\0";
 290
 291                         const char *x, *y;
 292
 293                         NULSTR_FOREACH_PAIR(x, y, auto_devices)
 294                                 whitelist_device(path, x, y);
 295                 }
 296
 297                 LIST_FOREACH(device_allow, a, c->device_allow) {
 298                         char acc[4];
 299                         unsigned k = 0;
 300
 301                         if (a->r)
 302                                 acc[k++] = 'r';
 303                         if (a->w)
 304                                 acc[k++] = 'w';
 305                         if (a->m)
 306                                 acc[k++] = 'm';
 307
 308                         if (k == 0)
 309                                 continue;
 310
 311                         acc[k++] = 0;
 312                         whitelist_device(path, a->path, acc);
 313                 }
 314         }
 315 }
 316
 317 CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
 318         CGroupControllerMask mask = 0;
 319
 320         /* Figure out which controllers we need */
 321
 322         if (c->cpu_accounting || c->cpu_shares != 1024)
 323                 mask |= CGROUP_CPUACCT | CGROUP_CPU;
 324
 325         if (c->blockio_accounting ||
 326             c->blockio_weight != 1000 ||
 327             c->blockio_device_weights ||
 328             c->blockio_device_bandwidths)
 329                 mask |= CGROUP_BLKIO;
 330
 331         if (c->memory_accounting ||
 332             c->memory_limit != (uint64_t) -1 ||
 333             c->memory_soft_limit != (uint64_t) -1)
 334                 mask |= CGROUP_MEMORY;
 335
 336         if (c->device_allow || c->device_policy != CGROUP_AUTO)
 337                 mask |= CGROUP_DEVICE;
 338
 339         return mask;
 340 }
 341
 342 static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
 343         CGroupContext *c;
 344
 345         c = unit_get_cgroup_context(u);
 346         if (!c)
 347                 return 0;
 348
 349         return cgroup_context_get_mask(c);
 350 }
 351
 352 static CGroupControllerMask unit_get_members_mask(Unit *u) {
 353         CGroupControllerMask mask = 0;
 354         Unit *m;
 355         Iterator i;
 356
 357         assert(u);
 358
 359         SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) {
 360
 361                 if (UNIT_DEREF(m->slice) != u)
 362                         continue;
 363
 364                 mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m);
 365         }
 366
 367         return mask;
 368 }
 369
 370 static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
 371         assert(u);
 372
 373         if (!UNIT_ISSET(u->slice))
 374                 return 0;
 375
 376         /* Sibling propagation is only relevant for weight-based
 377          * controllers, so let's mask out everything else */
 378         return unit_get_members_mask(UNIT_DEREF(u->slice)) &
 379                 (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
 380 }
 381
 382 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
 383         char *path = NULL;
 384         int r;
 385
 386         assert(u);
 387
 388         path = unit_default_cgroup_path(u);
 389         if (!path)
 390                 return -ENOMEM;
 391
 392         r = hashmap_put(u->manager->cgroup_unit, path, u);
 393         if (r < 0)
 394                 return r;
 395
 396         /* First, create our own group */
 397         r = cg_create_with_mask(mask, path);
 398         if (r < 0)
 399                 log_error("Failed to create cgroup %s: %s", path, strerror(-r));
 400
 401         /* Then, possibly move things over */
 402         if (u->cgroup_path && !streq(path, u->cgroup_path)) {
 403                 r = cg_migrate_with_mask(mask, u->cgroup_path, path);
 404                 if (r < 0)
 405                         log_error("Failed to migrate cgroup %s: %s", path, strerror(-r));
 406         }
 407
 408         /* And remember the new data */
 409         free(u->cgroup_path);
 410         u->cgroup_path = path;
 411         u->cgroup_realized = true;
 412         u->cgroup_mask = mask;
 413
 414         return 0;
 415 }
 416
 417 static int unit_realize_cgroup_now(Unit *u) {
 418         CGroupControllerMask mask;
 419
 420         assert(u);
 421
 422         if (u->in_cgroup_queue) {
 423                 LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u);
 424                 u->in_cgroup_queue = false;
 425         }
 426
 427         mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
 428         mask &= u->manager->cgroup_supported;
 429
 430         if (u->cgroup_realized &&
 431             u->cgroup_mask == mask)
 432                 return 0;
 433
 434         /* First, realize parents */
 435         if (UNIT_ISSET(u->slice))
 436                 unit_realize_cgroup_now(UNIT_DEREF(u->slice));
 437
 438         /* And then do the real work */
 439         return unit_create_cgroups(u, mask);
 440 }
 441
 442 static void unit_add_to_cgroup_queue(Unit *u) {
 443
 444         if (u->in_cgroup_queue)
 445                 return;
 446
 447         LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u);
 448         u->in_cgroup_queue = true;
 449 }
 450
 451 unsigned manager_dispatch_cgroup_queue(Manager *m) {
 452         Unit *i;
 453         unsigned n = 0;
 454
 455         while ((i = m->cgroup_queue)) {
 456                 assert(i->in_cgroup_queue);
 457
 458                 if (unit_realize_cgroup_now(i) >= 0)
 459                         cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path);
 460
 461                 n++;
 462         }
 463
 464         return n;
 465 }
 466
 467 static void unit_queue_siblings(Unit *u) {
 468         Unit *slice;
 469
 470         /* This adds the siblings of the specified unit and the
 471          * siblings of all parent units to the cgroup queue. (But
 472          * neither the specified unit itself nor the parents.) */
 473
 474         while ((slice = UNIT_DEREF(u->slice))) {
 475                 Iterator i;
 476                 Unit *m;
 477
 478                 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
 479                         if (m == u)
 480                                 continue;
 481
 482                         if (UNIT_DEREF(m->slice) != slice)
 483                                 continue;
 484
 485                         unit_add_to_cgroup_queue(m);
 486                 }
 487
 488                 u = slice;
 489         }
 490 }
 491
 492 int unit_realize_cgroup(Unit *u) {
 493         CGroupContext *c;
 494         int r;
 495
 496         assert(u);
 497
 498         c = unit_get_cgroup_context(u);
 499         if (!c)
 500                 return 0;
 501
 502         /* So, here's the deal: when realizing the cgroups for this
 503          * unit, we need to first create all parents, but there's more
 504          * actually: for the weight-based controllers we also need to
 505          * make sure that all our siblings (i.e. units that are in the
 506          * same slice as we are) have cgroup too. Otherwise things
 507          * would become very uneven as each of their processes would
 508          * get as much resources as all our group together. This call
 509          * will synchronously create the parent cgroups, but will
 510          * defer work on the siblings to the next event loop
 511          * iteration. */
 512
 513         /* Add all sibling slices to the cgroup queue. */
 514         unit_queue_siblings(u);
 515
 516         /* And realize this one now */
 517         r = unit_realize_cgroup_now(u);
 518
 519         /* And apply the values */
 520         if (r >= 0)
 521                 cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path);
 522
 523         return r;
 524 }
 525
 526 void unit_destroy_cgroup(Unit *u) {
 527         int r;
 528
 529         assert(u);
 530
 531         if (!u->cgroup_path)
 532                 return;
 533
 534         r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
 535         if (r < 0)
 536                 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
 537
 538         hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
 539
 540         free(u->cgroup_path);
 541         u->cgroup_path = NULL;
 542         u->cgroup_realized = false;
 543         u->cgroup_mask = 0;
 544
 545 }
 546
 547 pid_t unit_search_main_pid(Unit *u) {
 548         _cleanup_fclose_ FILE *f = NULL;
 549         pid_t pid = 0, npid, mypid;
 550
 551         assert(u);
 552
 553         if (!u->cgroup_path)
 554                 return 0;
 555
 556         if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
 557                 return 0;
 558
 559         mypid = getpid();
 560         while (cg_read_pid(f, &npid) > 0)  {
 561                 pid_t ppid;
 562
 563                 if (npid == pid)
 564                         continue;
 565
 566                 /* Ignore processes that aren't our kids */
 567                 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
 568                         continue;
 569
 570                 if (pid != 0) {
 571                         /* Dang, there's more than one daemonized PID
 572                         in this group, so we don't know what process
 573                         is the main process. */
 574                         pid = 0;
 575                         break;
 576                 }
 577
 578                 pid = npid;
 579         }
 580
 581         return pid;
 582 }
 583
 584 int manager_setup_cgroup(Manager *m) {
 585         _cleanup_free_ char *path = NULL;
 586         int r;
 587         char *e, *a;
 588
 589         assert(m);
 590
 591         /* 0. Be nice to Ingo Molnar #628004 */
 592         if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
 593                 log_warning("No control group support available, not creating root group.");
 594                 return 0;
 595         }
 596
 597         /* 1. Determine hierarchy */
 598         free(m->cgroup_root);
 599         m->cgroup_root = NULL;
 600
 601         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
 602         if (r < 0) {
 603                 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
 604                 return r;
 605         }
 606
 607         /* Already in /system.slice? If so, let's cut this off again */
 608         if (m->running_as == SYSTEMD_SYSTEM) {
 609                 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
 610                 if (e)
 611                         *e = 0;
 612         }
 613
 614         /* And make sure to store away the root value without trailing
 615          * slash, even for the root dir, so that we can easily prepend
 616          * it everywhere. */
 617         if (streq(m->cgroup_root, "/"))
 618                 m->cgroup_root[0] = 0;
 619
 620         /* 2. Show data */
 621         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
 622         if (r < 0) {
 623                 log_error("Cannot find cgroup mount point: %s", strerror(-r));
 624                 return r;
 625         }
 626
 627         log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
 628
 629         /* 3. Install agent */
 630         if (m->running_as == SYSTEMD_SYSTEM) {
 631                 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
 632                 if (r < 0)
 633                         log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
 634                 else if (r > 0)
 635                         log_debug("Installed release agent.");
 636                 else
 637                         log_debug("Release agent already installed.");
 638         }
 639
 640         /* 4. Realize the system slice and put us in there */
 641         if (m->running_as == SYSTEMD_SYSTEM) {
 642                 a = strappenda(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
 643                 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, a, 0);
 644         } else
 645                 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
 646         if (r < 0) {
 647                 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
 648                 return r;
 649         }
 650
 651         /* 5. And pin it, so that it cannot be unmounted */
 652         if (m->pin_cgroupfs_fd >= 0)
 653                 close_nointr_nofail(m->pin_cgroupfs_fd);
 654
 655         m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
 656         if (r < 0) {
 657                 log_error("Failed to open pin file: %m");
 658                 return -errno;
 659         }
 660
 661         /* 6. Figure out which controllers are supported */
 662         m->cgroup_supported = cg_mask_supported();
 663
 664         return 0;
 665 }
 666
 667 void manager_shutdown_cgroup(Manager *m, bool delete) {
 668         assert(m);
 669
 670         /* We can't really delete the group, since we are in it. But
 671          * let's trim it. */
 672         if (delete && m->cgroup_root)
 673                 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
 674
 675         if (m->pin_cgroupfs_fd >= 0) {
 676                 close_nointr_nofail(m->pin_cgroupfs_fd);
 677                 m->pin_cgroupfs_fd = -1;
 678         }
 679
 680         free(m->cgroup_root);
 681         m->cgroup_root = NULL;
 682 }
 683
 684 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
 685         char *p;
 686         Unit *u;
 687
 688         assert(m);
 689         assert(cgroup);
 690
 691         u = hashmap_get(m->cgroup_unit, cgroup);
 692         if (u)
 693                 return u;
 694
 695         p = strdupa(cgroup);
 696         for (;;) {
 697                 char *e;
 698
 699                 e = strrchr(p, '/');
 700                 if (e == p || !e)
 701                         return NULL;
 702
 703                 *e = 0;
 704
 705                 u = hashmap_get(m->cgroup_unit, p);
 706                 if (u)
 707                         return u;
 708         }
 709 }
 710
 711 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
 712         _cleanup_free_ char *cgroup = NULL;
 713         int r;
 714
 715         assert(m);
 716
 717         if (pid <= 1)
 718                 return NULL;
 719
 720         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
 721         if (r < 0)
 722                 return NULL;
 723
 724         return manager_get_unit_by_cgroup(m, cgroup);
 725 }
 726
 727 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
 728         Unit *u;
 729         int r;
 730
 731         assert(m);
 732         assert(cgroup);
 733
 734         u = manager_get_unit_by_cgroup(m, cgroup);
 735         if (u) {
 736                 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
 737                 if (r > 0) {
 738                         if (UNIT_VTABLE(u)->notify_cgroup_empty)
 739                                 UNIT_VTABLE(u)->notify_cgroup_empty(u);
 740
 741                         unit_add_to_gc_queue(u);
 742                 }
 743         }
 744
 745         return 0;
 746 }
 747
 748 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
 749         [CGROUP_AUTO] = "auto",
 750         [CGROUP_CLOSED] = "closed",
 751         [CGROUP_STRICT] = "strict",
 752 };
 753
 754 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);