1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/statfs.h>
27 #include "btrfs-util.h"
28 #include "path-util.h"
32 #include "machine-image.h"
34 static const char image_search_path[] =
36 "/var/lib/container\0"
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
40 Image *image_unref(Image *i) {
60 _cleanup_(image_unrefp) Image *i = NULL;
63 assert(t < _IMAGE_TYPE_MAX);
73 i->read_only = read_only;
76 i->usage = i->usage_exclusive = (uint64_t) -1;
77 i->limit = i->limit_exclusive = (uint64_t) -1;
79 i->name = strdup(pretty);
84 i->path = strjoin(path, "/", filename, NULL);
86 i->path = strdup(filename);
91 path_kill_slashes(i->path);
99 static int image_make(
103 const char *filename,
112 /* We explicitly *do* follow symlinks here, since we want to
113 * allow symlinking trees into /var/lib/machines/, and treat
116 if (fstatat(dfd, filename, &st, 0) < 0)
120 (path && path_startswith(path, "/usr")) ||
121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
123 if (S_ISDIR(st.st_mode)) {
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
141 if (fstatfs(fd, &sfs) < 0)
144 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
145 BtrfsSubvolInfo info;
146 BtrfsQuotaInfo quota;
148 /* It's a btrfs subvolume */
150 r = btrfs_subvol_get_info_fd(fd, &info);
154 r = image_new(IMAGE_SUBVOLUME,
158 info.read_only || read_only,
165 r = btrfs_subvol_get_quota_fd(fd, "a);
167 (*ret)->usage = quota.referenced;
168 (*ret)->usage_exclusive = quota.exclusive;
170 (*ret)->limit = quota.referenced_max;
171 (*ret)->limit_exclusive = quota.exclusive_max;
178 /* If the IMMUTABLE bit is set, we consider the
179 * directory read-only. Since the ioctl is not
180 * supported everywhere we ignore failures. */
181 (void) read_attr_fd(fd, &file_attr);
183 /* It's just a normal directory. */
184 r = image_new(IMAGE_DIRECTORY,
188 read_only || (file_attr & FS_IMMUTABLE_FL),
197 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
200 /* It's a RAW disk image */
205 fd_getcrtime_at(dfd, filename, &crtime, 0);
208 pretty = strndupa(filename, strlen(filename) - 4);
210 r = image_new(IMAGE_RAW,
214 !(st.st_mode & 0222) || read_only,
216 timespec_load(&st.st_mtim),
221 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
222 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
230 int image_find(const char *name, Image **ret) {
236 /* There are no images with invalid names */
237 if (!image_name_is_valid(name))
240 NULSTR_FOREACH(path, image_search_path) {
241 _cleanup_closedir_ DIR *d = NULL;
251 r = image_make(NULL, dirfd(d), path, name, ret);
252 if (r == 0 || r == -ENOENT) {
253 _cleanup_free_ char *raw = NULL;
255 raw = strappend(name, ".raw");
259 r = image_make(NULL, dirfd(d), path, raw, ret);
260 if (r == 0 || r == -ENOENT)
269 if (streq(name, ".host"))
270 return image_make(".host", AT_FDCWD, NULL, "/", ret);
275 int image_discover(Hashmap *h) {
281 NULSTR_FOREACH(path, image_search_path) {
282 _cleanup_closedir_ DIR *d = NULL;
293 FOREACH_DIRENT_ALL(de, d, return -errno) {
294 _cleanup_(image_unrefp) Image *image = NULL;
296 if (!image_name_is_valid(de->d_name))
299 if (hashmap_contains(h, de->d_name))
302 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
303 if (r == 0 || r == -ENOENT)
308 r = hashmap_put(h, image->name, image);
316 if (!hashmap_contains(h, ".host")) {
317 _cleanup_(image_unrefp) Image *image = NULL;
319 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
323 r = hashmap_put(h, image->name, image);
334 void image_hashmap_free(Hashmap *map) {
337 while ((i = hashmap_steal_first(map)))
343 int image_remove(Image *i) {
344 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
349 if (path_equal(i->path, "/") ||
350 path_startswith(i->path, "/usr"))
353 /* Make sure we don't interfere with a running nspawn */
354 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
360 case IMAGE_SUBVOLUME:
361 return btrfs_subvol_remove(i->path);
363 case IMAGE_DIRECTORY:
364 /* Allow deletion of read-only directories */
365 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
370 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL);
377 int image_rename(Image *i, const char *new_name) {
378 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
379 _cleanup_free_ char *new_path = NULL, *nn = NULL;
380 unsigned file_attr = 0;
385 if (!image_name_is_valid(new_name))
388 if (path_equal(i->path, "/") ||
389 path_startswith(i->path, "/usr"))
392 /* Make sure we don't interfere with a running nspawn */
393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
397 /* Make sure nobody takes the new name, between the time we
398 * checked it is currently unused in all search paths, and the
399 * time we take possesion of it */
400 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
404 r = image_find(new_name, NULL);
412 case IMAGE_DIRECTORY:
413 /* Turn of the immutable bit while we rename the image, so that we can rename it */
414 (void) read_attr_path(i->path, &file_attr);
416 if (file_attr & FS_IMMUTABLE_FL)
417 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
421 case IMAGE_SUBVOLUME:
422 new_path = file_in_same_dir(i->path, new_name);
428 fn = strjoina(new_name, ".raw");
429 new_path = file_in_same_dir(i->path, fn);
440 nn = strdup(new_name);
444 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
448 /* Restore the immutable bit, if it was set before */
449 if (file_attr & FS_IMMUTABLE_FL)
450 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
463 int image_clone(Image *i, const char *new_name, bool read_only) {
464 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
465 const char *new_path;
470 if (!image_name_is_valid(new_name))
473 /* Make sure nobody takes the new name, between the time we
474 * checked it is currently unused in all search paths, and the
475 * time we take possesion of it */
476 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
480 r = image_find(new_name, NULL);
488 case IMAGE_SUBVOLUME:
489 case IMAGE_DIRECTORY:
490 new_path = strjoina("/var/lib/machines/", new_name);
492 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
496 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
498 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
511 int image_read_only(Image *i, bool b) {
512 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
516 if (path_equal(i->path, "/") ||
517 path_startswith(i->path, "/usr"))
520 /* Make sure we don't interfere with a running nspawn */
521 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
527 case IMAGE_SUBVOLUME:
528 r = btrfs_subvol_set_read_only(i->path, b);
534 case IMAGE_DIRECTORY:
535 /* For simple directory trees we cannot use the access
536 mode of the top-level directory, since it has an
537 effect on the container itself. However, we can
538 use the "immutable" flag, to at least make the
539 top-level directory read-only. It's not as good as
540 a read-only subvolume, but at least something, and
541 we can read the value back.*/
543 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
552 if (stat(i->path, &st) < 0)
555 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
558 /* If the images is now read-only, it's a good time to
559 * defrag it, given that no write patterns will
560 * fragment it again. */
562 (void) btrfs_defrag(i->path);
573 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
574 _cleanup_free_ char *p = NULL;
575 LockFile t = LOCK_FILE_INIT;
583 /* Locks an image path. This actually creates two locks: one
584 * "local" one, next to the image path itself, which might be
585 * shared via NFS. And another "global" one, in /run, that
586 * uses the device/inode number. This has the benefit that we
587 * can even lock a tree that is a mount point, correctly. */
589 if (path_equal(path, "/"))
592 if (!path_is_absolute(path))
595 if (stat(path, &st) >= 0) {
596 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
600 r = make_lock_file_for(path, operation, &t);
605 mkdir_p("/run/systemd/nspawn/locks", 0600);
607 r = make_lock_file(p, operation, global);
609 release_lock_file(&t);
618 int image_set_limit(Image *i, uint64_t referenced_max) {
621 if (path_equal(i->path, "/") ||
622 path_startswith(i->path, "/usr"))
625 if (i->type != IMAGE_SUBVOLUME)
628 return btrfs_quota_limit(i->path, referenced_max);
631 int image_name_lock(const char *name, int operation, LockFile *ret) {
637 /* Locks an image name, regardless of the precise path used. */
639 if (!image_name_is_valid(name))
642 if (streq(name, ".host"))
645 mkdir_p("/run/systemd/nspawn/locks", 0600);
646 p = strjoina("/run/systemd/nspawn/locks/name-", name);
648 return make_lock_file(p, operation, ret);
651 bool image_name_is_valid(const char *s) {
652 if (!filename_is_valid(s))
655 if (string_has_cc(s, NULL))
658 if (!utf8_is_valid(s))
661 /* Temporary files for atomically creating new files */
662 if (startswith(s, ".#"))
668 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
669 [IMAGE_DIRECTORY] = "directory",
670 [IMAGE_SUBVOLUME] = "subvolume",
674 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);