1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/statfs.h>
27 #include "btrfs-util.h"
28 #include "path-util.h"
32 #include "machine-image.h"
34 static const char image_search_path[] =
36 "/var/lib/container\0"
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
40 Image *image_unref(Image *i) {
60 _cleanup_(image_unrefp) Image *i = NULL;
63 assert(t < _IMAGE_TYPE_MAX);
73 i->read_only = read_only;
76 i->usage = i->usage_exclusive = (uint64_t) -1;
77 i->limit = i->limit_exclusive = (uint64_t) -1;
79 i->name = strdup(pretty);
84 i->path = strjoin(path, "/", filename, NULL);
86 i->path = strdup(filename);
91 path_kill_slashes(i->path);
99 static int image_make(
103 const char *filename,
112 /* We explicitly *do* follow symlinks here, since we want to
113 * allow symlinking trees into /var/lib/machines/, and treat
116 if (fstatat(dfd, filename, &st, 0) < 0)
120 (path && path_startswith(path, "/usr")) ||
121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
123 if (S_ISDIR(st.st_mode)) {
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
140 r = btrfs_is_filesystem(fd);
144 BtrfsSubvolInfo info;
145 BtrfsQuotaInfo quota;
147 /* It's a btrfs subvolume */
149 r = btrfs_subvol_get_info_fd(fd, &info);
153 r = image_new(IMAGE_SUBVOLUME,
157 info.read_only || read_only,
164 r = btrfs_subvol_get_quota_fd(fd, "a);
166 (*ret)->usage = quota.referenced;
167 (*ret)->usage_exclusive = quota.exclusive;
169 (*ret)->limit = quota.referenced_max;
170 (*ret)->limit_exclusive = quota.exclusive_max;
177 /* If the IMMUTABLE bit is set, we consider the
178 * directory read-only. Since the ioctl is not
179 * supported everywhere we ignore failures. */
180 (void) read_attr_fd(fd, &file_attr);
182 /* It's just a normal directory. */
183 r = image_new(IMAGE_DIRECTORY,
187 read_only || (file_attr & FS_IMMUTABLE_FL),
196 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
199 /* It's a RAW disk image */
204 fd_getcrtime_at(dfd, filename, &crtime, 0);
207 pretty = strndupa(filename, strlen(filename) - 4);
209 r = image_new(IMAGE_RAW,
213 !(st.st_mode & 0222) || read_only,
215 timespec_load(&st.st_mtim),
220 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
221 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
229 int image_find(const char *name, Image **ret) {
235 /* There are no images with invalid names */
236 if (!image_name_is_valid(name))
239 NULSTR_FOREACH(path, image_search_path) {
240 _cleanup_closedir_ DIR *d = NULL;
250 r = image_make(NULL, dirfd(d), path, name, ret);
251 if (r == 0 || r == -ENOENT) {
252 _cleanup_free_ char *raw = NULL;
254 raw = strappend(name, ".raw");
258 r = image_make(NULL, dirfd(d), path, raw, ret);
259 if (r == 0 || r == -ENOENT)
268 if (streq(name, ".host"))
269 return image_make(".host", AT_FDCWD, NULL, "/", ret);
274 int image_discover(Hashmap *h) {
280 NULSTR_FOREACH(path, image_search_path) {
281 _cleanup_closedir_ DIR *d = NULL;
292 FOREACH_DIRENT_ALL(de, d, return -errno) {
293 _cleanup_(image_unrefp) Image *image = NULL;
295 if (!image_name_is_valid(de->d_name))
298 if (hashmap_contains(h, de->d_name))
301 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
302 if (r == 0 || r == -ENOENT)
307 r = hashmap_put(h, image->name, image);
315 if (!hashmap_contains(h, ".host")) {
316 _cleanup_(image_unrefp) Image *image = NULL;
318 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
322 r = hashmap_put(h, image->name, image);
333 void image_hashmap_free(Hashmap *map) {
336 while ((i = hashmap_steal_first(map)))
342 int image_remove(Image *i) {
343 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
348 if (path_equal(i->path, "/") ||
349 path_startswith(i->path, "/usr"))
352 /* Make sure we don't interfere with a running nspawn */
353 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
359 case IMAGE_SUBVOLUME:
360 return btrfs_subvol_remove(i->path, true);
362 case IMAGE_DIRECTORY:
363 /* Allow deletion of read-only directories */
364 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
365 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
368 if (unlink(i->path) < 0)
378 int image_rename(Image *i, const char *new_name) {
379 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
380 _cleanup_free_ char *new_path = NULL, *nn = NULL;
381 unsigned file_attr = 0;
386 if (!image_name_is_valid(new_name))
389 if (path_equal(i->path, "/") ||
390 path_startswith(i->path, "/usr"))
393 /* Make sure we don't interfere with a running nspawn */
394 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
398 /* Make sure nobody takes the new name, between the time we
399 * checked it is currently unused in all search paths, and the
400 * time we take possesion of it */
401 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
405 r = image_find(new_name, NULL);
413 case IMAGE_DIRECTORY:
414 /* Turn of the immutable bit while we rename the image, so that we can rename it */
415 (void) read_attr_path(i->path, &file_attr);
417 if (file_attr & FS_IMMUTABLE_FL)
418 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
422 case IMAGE_SUBVOLUME:
423 new_path = file_in_same_dir(i->path, new_name);
429 fn = strjoina(new_name, ".raw");
430 new_path = file_in_same_dir(i->path, fn);
441 nn = strdup(new_name);
445 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
449 /* Restore the immutable bit, if it was set before */
450 if (file_attr & FS_IMMUTABLE_FL)
451 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
464 int image_clone(Image *i, const char *new_name, bool read_only) {
465 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
466 const char *new_path;
471 if (!image_name_is_valid(new_name))
474 /* Make sure nobody takes the new name, between the time we
475 * checked it is currently unused in all search paths, and the
476 * time we take possesion of it */
477 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
481 r = image_find(new_name, NULL);
489 case IMAGE_SUBVOLUME:
490 case IMAGE_DIRECTORY:
491 new_path = strjoina("/var/lib/machines/", new_name);
493 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
497 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
499 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
512 int image_read_only(Image *i, bool b) {
513 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
517 if (path_equal(i->path, "/") ||
518 path_startswith(i->path, "/usr"))
521 /* Make sure we don't interfere with a running nspawn */
522 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
528 case IMAGE_SUBVOLUME:
529 r = btrfs_subvol_set_read_only(i->path, b);
535 case IMAGE_DIRECTORY:
536 /* For simple directory trees we cannot use the access
537 mode of the top-level directory, since it has an
538 effect on the container itself. However, we can
539 use the "immutable" flag, to at least make the
540 top-level directory read-only. It's not as good as
541 a read-only subvolume, but at least something, and
542 we can read the value back.*/
544 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
553 if (stat(i->path, &st) < 0)
556 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
559 /* If the images is now read-only, it's a good time to
560 * defrag it, given that no write patterns will
561 * fragment it again. */
563 (void) btrfs_defrag(i->path);
574 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
575 _cleanup_free_ char *p = NULL;
576 LockFile t = LOCK_FILE_INIT;
584 /* Locks an image path. This actually creates two locks: one
585 * "local" one, next to the image path itself, which might be
586 * shared via NFS. And another "global" one, in /run, that
587 * uses the device/inode number. This has the benefit that we
588 * can even lock a tree that is a mount point, correctly. */
590 if (path_equal(path, "/"))
593 if (!path_is_absolute(path))
596 if (stat(path, &st) >= 0) {
597 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
601 r = make_lock_file_for(path, operation, &t);
606 mkdir_p("/run/systemd/nspawn/locks", 0600);
608 r = make_lock_file(p, operation, global);
610 release_lock_file(&t);
619 int image_set_limit(Image *i, uint64_t referenced_max) {
622 if (path_equal(i->path, "/") ||
623 path_startswith(i->path, "/usr"))
626 if (i->type != IMAGE_SUBVOLUME)
629 return btrfs_quota_limit(i->path, referenced_max);
632 int image_name_lock(const char *name, int operation, LockFile *ret) {
638 /* Locks an image name, regardless of the precise path used. */
640 if (!image_name_is_valid(name))
643 if (streq(name, ".host"))
646 mkdir_p("/run/systemd/nspawn/locks", 0600);
647 p = strjoina("/run/systemd/nspawn/locks/name-", name);
649 return make_lock_file(p, operation, ret);
652 bool image_name_is_valid(const char *s) {
653 if (!filename_is_valid(s))
656 if (string_has_cc(s, NULL))
659 if (!utf8_is_valid(s))
662 /* Temporary files for atomically creating new files */
663 if (startswith(s, ".#"))
669 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
670 [IMAGE_DIRECTORY] = "directory",
671 [IMAGE_SUBVOLUME] = "subvolume",
675 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);