1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/statfs.h>
27 #include "btrfs-util.h"
28 #include "path-util.h"
32 #include "machine-image.h"
34 static const char image_search_path[] =
36 "/var/lib/container\0"
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
40 Image *image_unref(Image *i) {
60 _cleanup_(image_unrefp) Image *i = NULL;
63 assert(t < _IMAGE_TYPE_MAX);
73 i->read_only = read_only;
76 i->usage = i->usage_exclusive = (uint64_t) -1;
77 i->limit = i->limit_exclusive = (uint64_t) -1;
79 i->name = strdup(pretty);
84 i->path = strjoin(path, "/", filename, NULL);
86 i->path = strdup(filename);
91 path_kill_slashes(i->path);
99 static int image_make(
103 const char *filename,
112 /* We explicitly *do* follow symlinks here, since we want to
113 * allow symlinking trees into /var/lib/machines/, and treat
116 if (fstatat(dfd, filename, &st, 0) < 0)
120 (path && path_startswith(path, "/usr")) ||
121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
123 if (S_ISDIR(st.st_mode)) {
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
141 if (fstatfs(fd, &sfs) < 0)
144 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
145 BtrfsSubvolInfo info;
146 BtrfsQuotaInfo quota;
148 /* It's a btrfs subvolume */
150 r = btrfs_subvol_get_info_fd(fd, &info);
154 r = image_new(IMAGE_SUBVOLUME,
158 info.read_only || read_only,
165 r = btrfs_subvol_get_quota_fd(fd, "a);
167 (*ret)->usage = quota.referenced;
168 (*ret)->usage_exclusive = quota.exclusive;
170 (*ret)->limit = quota.referenced_max;
171 (*ret)->limit_exclusive = quota.exclusive_max;
178 /* If the IMMUTABLE bit is set, we consider the
179 * directory read-only. Since the ioctl is not
180 * supported everywhere we ignore failures. */
181 (void) read_attr_fd(fd, &file_attr);
183 /* It's just a normal directory. */
184 r = image_new(IMAGE_DIRECTORY,
188 read_only || (file_attr & FS_IMMUTABLE_FL),
197 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
200 /* It's a RAW disk image */
205 fd_getcrtime_at(dfd, filename, &crtime, 0);
208 pretty = strndupa(filename, strlen(filename) - 4);
210 r = image_new(IMAGE_RAW,
214 !(st.st_mode & 0222) || read_only,
216 timespec_load(&st.st_mtim),
221 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
222 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
230 int image_find(const char *name, Image **ret) {
236 /* There are no images with invalid names */
237 if (!image_name_is_valid(name))
240 NULSTR_FOREACH(path, image_search_path) {
241 _cleanup_closedir_ DIR *d = NULL;
251 r = image_make(NULL, dirfd(d), path, name, ret);
252 if (r == 0 || r == -ENOENT) {
253 _cleanup_free_ char *raw = NULL;
255 raw = strappend(name, ".raw");
259 r = image_make(NULL, dirfd(d), path, raw, ret);
260 if (r == 0 || r == -ENOENT)
269 if (streq(name, ".host"))
270 return image_make(".host", AT_FDCWD, NULL, "/", ret);
275 int image_discover(Hashmap *h) {
281 NULSTR_FOREACH(path, image_search_path) {
282 _cleanup_closedir_ DIR *d = NULL;
293 FOREACH_DIRENT_ALL(de, d, return -errno) {
294 _cleanup_(image_unrefp) Image *image = NULL;
296 if (!image_name_is_valid(de->d_name))
299 if (hashmap_contains(h, de->d_name))
302 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
303 if (r == 0 || r == -ENOENT)
308 r = hashmap_put(h, image->name, image);
316 if (!hashmap_contains(h, ".host")) {
317 _cleanup_(image_unrefp) Image *image = NULL;
319 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
323 r = hashmap_put(h, image->name, image);
334 void image_hashmap_free(Hashmap *map) {
337 while ((i = hashmap_steal_first(map)))
343 int image_remove(Image *i) {
344 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
349 if (path_equal(i->path, "/") ||
350 path_startswith(i->path, "/usr"))
353 /* Make sure we don't interfere with a running nspawn */
354 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
360 case IMAGE_SUBVOLUME:
361 return btrfs_subvol_remove(i->path, true);
363 case IMAGE_DIRECTORY:
364 /* Allow deletion of read-only directories */
365 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
366 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
369 if (unlink(i->path) < 0)
379 int image_rename(Image *i, const char *new_name) {
380 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
381 _cleanup_free_ char *new_path = NULL, *nn = NULL;
382 unsigned file_attr = 0;
387 if (!image_name_is_valid(new_name))
390 if (path_equal(i->path, "/") ||
391 path_startswith(i->path, "/usr"))
394 /* Make sure we don't interfere with a running nspawn */
395 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
399 /* Make sure nobody takes the new name, between the time we
400 * checked it is currently unused in all search paths, and the
401 * time we take possesion of it */
402 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
406 r = image_find(new_name, NULL);
414 case IMAGE_DIRECTORY:
415 /* Turn of the immutable bit while we rename the image, so that we can rename it */
416 (void) read_attr_path(i->path, &file_attr);
418 if (file_attr & FS_IMMUTABLE_FL)
419 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
423 case IMAGE_SUBVOLUME:
424 new_path = file_in_same_dir(i->path, new_name);
430 fn = strjoina(new_name, ".raw");
431 new_path = file_in_same_dir(i->path, fn);
442 nn = strdup(new_name);
446 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
450 /* Restore the immutable bit, if it was set before */
451 if (file_attr & FS_IMMUTABLE_FL)
452 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
465 int image_clone(Image *i, const char *new_name, bool read_only) {
466 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
467 const char *new_path;
472 if (!image_name_is_valid(new_name))
475 /* Make sure nobody takes the new name, between the time we
476 * checked it is currently unused in all search paths, and the
477 * time we take possesion of it */
478 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
482 r = image_find(new_name, NULL);
490 case IMAGE_SUBVOLUME:
491 case IMAGE_DIRECTORY:
492 new_path = strjoina("/var/lib/machines/", new_name);
494 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
498 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
500 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
513 int image_read_only(Image *i, bool b) {
514 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
518 if (path_equal(i->path, "/") ||
519 path_startswith(i->path, "/usr"))
522 /* Make sure we don't interfere with a running nspawn */
523 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
529 case IMAGE_SUBVOLUME:
530 r = btrfs_subvol_set_read_only(i->path, b);
536 case IMAGE_DIRECTORY:
537 /* For simple directory trees we cannot use the access
538 mode of the top-level directory, since it has an
539 effect on the container itself. However, we can
540 use the "immutable" flag, to at least make the
541 top-level directory read-only. It's not as good as
542 a read-only subvolume, but at least something, and
543 we can read the value back.*/
545 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
554 if (stat(i->path, &st) < 0)
557 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
560 /* If the images is now read-only, it's a good time to
561 * defrag it, given that no write patterns will
562 * fragment it again. */
564 (void) btrfs_defrag(i->path);
575 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
576 _cleanup_free_ char *p = NULL;
577 LockFile t = LOCK_FILE_INIT;
585 /* Locks an image path. This actually creates two locks: one
586 * "local" one, next to the image path itself, which might be
587 * shared via NFS. And another "global" one, in /run, that
588 * uses the device/inode number. This has the benefit that we
589 * can even lock a tree that is a mount point, correctly. */
591 if (path_equal(path, "/"))
594 if (!path_is_absolute(path))
597 if (stat(path, &st) >= 0) {
598 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
602 r = make_lock_file_for(path, operation, &t);
607 mkdir_p("/run/systemd/nspawn/locks", 0600);
609 r = make_lock_file(p, operation, global);
611 release_lock_file(&t);
620 int image_set_limit(Image *i, uint64_t referenced_max) {
623 if (path_equal(i->path, "/") ||
624 path_startswith(i->path, "/usr"))
627 if (i->type != IMAGE_SUBVOLUME)
630 return btrfs_quota_limit(i->path, referenced_max);
633 int image_name_lock(const char *name, int operation, LockFile *ret) {
639 /* Locks an image name, regardless of the precise path used. */
641 if (!image_name_is_valid(name))
644 if (streq(name, ".host"))
647 mkdir_p("/run/systemd/nspawn/locks", 0600);
648 p = strjoina("/run/systemd/nspawn/locks/name-", name);
650 return make_lock_file(p, operation, ret);
653 bool image_name_is_valid(const char *s) {
654 if (!filename_is_valid(s))
657 if (string_has_cc(s, NULL))
660 if (!utf8_is_valid(s))
663 /* Temporary files for atomically creating new files */
664 if (startswith(s, ".#"))
670 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
671 [IMAGE_DIRECTORY] = "directory",
672 [IMAGE_SUBVOLUME] = "subvolume",
676 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);