X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Fmachine-image.c;h=273dacff1f2ac610adf80812ad0669c0e88912f9;hb=b733fbe7a0214eb43e402db7179697bf9c0975c1;hp=117994d6d88c94e2d6884b2e2e6f8795b08e3783;hpb=01b725684f6ba54f0db815669e4e07eb0e02fedb;p=elogind.git diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index 117994d6d..273dacff1 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -23,11 +23,12 @@ #include #include -#include "strv.h" #include "utf8.h" #include "btrfs-util.h" #include "path-util.h" #include "copy.h" +#include "mkdir.h" +#include "rm-rf.h" #include "machine-image.h" static const char image_search_path[] = @@ -72,7 +73,7 @@ static int image_new( i->read_only = read_only; i->crtime = crtime; i->mtime = mtime; - i->size = i->size_exclusive = (uint64_t) -1; + i->usage = i->usage_exclusive = (uint64_t) -1; i->limit = i->limit_exclusive = (uint64_t) -1; i->name = strdup(pretty); @@ -109,7 +110,7 @@ static int image_make( assert(filename); /* We explicitly *do* follow symlinks here, since we want to - * allow symlinking trees into /var/lib/container/, and treat + * allow symlinking trees into /var/lib/machines/, and treat * them normally. */ if (fstatat(dfd, filename, &st, 0) < 0) @@ -135,12 +136,11 @@ static int image_make( /* btrfs subvolumes have inode 256 */ if (st.st_ino == 256) { - struct statfs sfs; - if (fstatfs(fd, &sfs) < 0) - return -errno; - - if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r) { BtrfsSubvolInfo info; BtrfsQuotaInfo quota; @@ -163,10 +163,10 @@ static int image_make( r = btrfs_subvol_get_quota_fd(fd, "a); if (r >= 0) { - (*ret)->size = quota.referred; - (*ret)->size_exclusive = quota.exclusive; + (*ret)->usage = quota.referenced; + (*ret)->usage_exclusive = quota.exclusive; - (*ret)->limit = quota.referred_max; + (*ret)->limit = quota.referenced_max; (*ret)->limit_exclusive = quota.exclusive_max; } @@ -193,10 +193,10 @@ static int image_make( return 1; - } else if (S_ISREG(st.st_mode) && endswith(filename, ".gpt")) { + } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) { usec_t crtime = 0; - /* It's a GPT block device */ + /* It's a RAW disk image */ if (!ret) return 1; @@ -206,7 +206,7 @@ static int image_make( if (!pretty) pretty = strndupa(filename, strlen(filename) - 4); - r = image_new(IMAGE_GPT, + r = image_new(IMAGE_RAW, pretty, path, filename, @@ -217,7 +217,7 @@ static int image_make( if (r < 0) return r; - (*ret)->size = (*ret)->size_exclusive = st.st_blocks * 512; + (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512; (*ret)->limit = (*ret)->limit_exclusive = st.st_size; return 1; @@ -249,13 +249,13 @@ int image_find(const char *name, Image **ret) { r = image_make(NULL, dirfd(d), path, name, ret); if (r == 0 || r == -ENOENT) { - _cleanup_free_ char *gpt = NULL; + _cleanup_free_ char *raw = NULL; - gpt = strappend(name, ".gpt"); - if (!gpt) + raw = strappend(name, ".raw"); + if (!raw) return -ENOMEM; - r = image_make(NULL, dirfd(d), path, gpt, ret); + r = image_make(NULL, dirfd(d), path, raw, ret); if (r == 0 || r == -ENOENT) continue; } @@ -340,32 +340,43 @@ void image_hashmap_free(Hashmap *map) { } int image_remove(Image *i) { + _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + int r; + assert(i); if (path_equal(i->path, "/") || path_startswith(i->path, "/usr")) return -EROFS; + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + switch (i->type) { case IMAGE_SUBVOLUME: - return btrfs_subvol_remove(i->path); + return btrfs_subvol_remove(i->path, true); case IMAGE_DIRECTORY: /* Allow deletion of read-only directories */ (void) chattr_path(i->path, false, FS_IMMUTABLE_FL); + return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); - /* fall through */ + case IMAGE_RAW: + if (unlink(i->path) < 0) + return -errno; - case IMAGE_GPT: - return rm_rf_dangerous(i->path, false, true, false); + return 0; default: - return -ENOTSUP; + return -EOPNOTSUPP; } } int image_rename(Image *i, const char *new_name) { + _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; _cleanup_free_ char *new_path = NULL, *nn = NULL; unsigned file_attr = 0; int r; @@ -379,6 +390,18 @@ int image_rename(Image *i, const char *new_name) { path_startswith(i->path, "/usr")) return -EROFS; + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possesion of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + r = image_find(new_name, NULL); if (r < 0) return r; @@ -400,16 +423,16 @@ int image_rename(Image *i, const char *new_name) { new_path = file_in_same_dir(i->path, new_name); break; - case IMAGE_GPT: { + case IMAGE_RAW: { const char *fn; - fn = strappenda(new_name, ".gpt"); + fn = strjoina(new_name, ".raw"); new_path = file_in_same_dir(i->path, fn); break; } default: - return -ENOTSUP; + return -EOPNOTSUPP; } if (!new_path) @@ -419,8 +442,9 @@ int image_rename(Image *i, const char *new_name) { if (!nn) return -ENOMEM; - if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0) - return -errno; + r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path); + if (r < 0) + return r; /* Restore the immutable bit, if it was set before */ if (file_attr & FS_IMMUTABLE_FL) @@ -438,6 +462,7 @@ int image_rename(Image *i, const char *new_name) { } int image_clone(Image *i, const char *new_name, bool read_only) { + _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT; const char *new_path; int r; @@ -446,6 +471,13 @@ int image_clone(Image *i, const char *new_name, bool read_only) { if (!image_name_is_valid(new_name)) return -EINVAL; + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possesion of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + r = image_find(new_name, NULL); if (r < 0) return r; @@ -456,19 +488,19 @@ int image_clone(Image *i, const char *new_name, bool read_only) { case IMAGE_SUBVOLUME: case IMAGE_DIRECTORY: - new_path = strappenda("/var/lib/container/", new_name); + new_path = strjoina("/var/lib/machines/", new_name); - r = btrfs_subvol_snapshot(i->path, new_path, read_only, true); + r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); break; - case IMAGE_GPT: - new_path = strappenda("/var/lib/container/", new_name, ".gpt"); + case IMAGE_RAW: + new_path = strjoina("/var/lib/machines/", new_name, ".raw"); r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL); break; default: - return -ENOTSUP; + return -EOPNOTSUPP; } if (r < 0) @@ -478,6 +510,7 @@ int image_clone(Image *i, const char *new_name, bool read_only) { } int image_read_only(Image *i, bool b) { + _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; int r; assert(i); @@ -485,6 +518,11 @@ int image_read_only(Image *i, bool b) { path_startswith(i->path, "/usr")) return -EROFS; + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + switch (i->type) { case IMAGE_SUBVOLUME: @@ -509,7 +547,7 @@ int image_read_only(Image *i, bool b) { break; - case IMAGE_GPT: { + case IMAGE_RAW: { struct stat st; if (stat(i->path, &st) < 0) @@ -527,16 +565,111 @@ int image_read_only(Image *i, bool b) { } default: - return -ENOTSUP; + return -EOPNOTSUPP; } return 0; } +int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) { + _cleanup_free_ char *p = NULL; + LockFile t = LOCK_FILE_INIT; + struct stat st; + int r; + + assert(path); + assert(global); + assert(local); + + /* Locks an image path. This actually creates two locks: one + * "local" one, next to the image path itself, which might be + * shared via NFS. And another "global" one, in /run, that + * uses the device/inode number. This has the benefit that we + * can even lock a tree that is a mount point, correctly. */ + + if (path_equal(path, "/")) + return -EBUSY; + + if (!path_is_absolute(path)) + return -EINVAL; + + if (stat(path, &st) >= 0) { + if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0) + return -ENOMEM; + } + + r = make_lock_file_for(path, operation, &t); + if (r < 0) + return r; + + if (p) { + mkdir_p("/run/systemd/nspawn/locks", 0700); + + r = make_lock_file(p, operation, global); + if (r < 0) { + release_lock_file(&t); + return r; + } + } + + *local = t; + return 0; +} + +int image_set_limit(Image *i, uint64_t referenced_max) { + assert(i); + + if (path_equal(i->path, "/") || + path_startswith(i->path, "/usr")) + return -EROFS; + + if (i->type != IMAGE_SUBVOLUME) + return -EOPNOTSUPP; + + return btrfs_quota_limit(i->path, referenced_max); +} + +int image_name_lock(const char *name, int operation, LockFile *ret) { + const char *p; + + assert(name); + assert(ret); + + /* Locks an image name, regardless of the precise path used. */ + + if (!image_name_is_valid(name)) + return -EINVAL; + + if (streq(name, ".host")) + return -EBUSY; + + mkdir_p("/run/systemd/nspawn/locks", 0700); + p = strjoina("/run/systemd/nspawn/locks/name-", name); + + return make_lock_file(p, operation, ret); +} + +bool image_name_is_valid(const char *s) { + if (!filename_is_valid(s)) + return false; + + if (string_has_cc(s, NULL)) + return false; + + if (!utf8_is_valid(s)) + return false; + + /* Temporary files for atomically creating new files */ + if (startswith(s, ".#")) + return false; + + return true; +} + static const char* const image_type_table[_IMAGE_TYPE_MAX] = { [IMAGE_DIRECTORY] = "directory", [IMAGE_SUBVOLUME] = "subvolume", - [IMAGE_GPT] = "gpt", + [IMAGE_RAW] = "raw", }; DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);