1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #ifdef HAVE_LINUX_BTRFS_H
27 #include <linux/btrfs.h>
32 #include "path-util.h"
35 #include "selinux-util.h"
36 #include "smack-util.h"
38 #include "btrfs-ctree.h"
39 #include "btrfs-util.h"
41 static int validate_subvolume_name(const char *name) {
43 if (!filename_is_valid(name))
46 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
52 static int open_parent(const char *path, int flags) {
53 _cleanup_free_ char *parent = NULL;
58 r = path_get_parent(path, &parent);
62 fd = open(parent, flags);
69 static int extract_subvolume_name(const char *path, const char **subvolume) {
78 r = validate_subvolume_name(fn);
86 int btrfs_is_snapshot(int fd) {
90 /* On btrfs subvolumes always have the inode 256 */
92 if (fstat(fd, &st) < 0)
95 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
98 if (fstatfs(fd, &sfs) < 0)
101 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
104 int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, bool read_only, bool fallback_copy) {
105 struct btrfs_ioctl_vol_args_v2 args = {
106 .flags = read_only ? BTRFS_SUBVOL_RDONLY : 0,
108 _cleanup_close_ int new_fd = -1;
109 const char *subvolume;
114 r = btrfs_is_snapshot(old_fd);
121 r = btrfs_subvol_make(new_path);
125 r = copy_directory_fd(old_fd, new_path, true);
127 btrfs_subvol_remove(new_path, false);
132 r = btrfs_subvol_set_read_only(new_path, true);
134 btrfs_subvol_remove(new_path, false);
142 r = extract_subvolume_name(new_path, &subvolume);
146 new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
150 strncpy(args.name, subvolume, sizeof(args.name)-1);
153 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &args) < 0)
159 int btrfs_subvol_snapshot(const char *old_path, const char *new_path, bool read_only, bool fallback_copy) {
160 _cleanup_close_ int old_fd = -1;
165 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
169 return btrfs_subvol_snapshot_fd(old_fd, new_path, read_only, fallback_copy);
172 int btrfs_subvol_make(const char *path) {
173 struct btrfs_ioctl_vol_args args = {};
174 _cleanup_close_ int fd = -1;
175 const char *subvolume;
180 r = extract_subvolume_name(path, &subvolume);
184 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
188 strncpy(args.name, subvolume, sizeof(args.name)-1);
190 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
196 int btrfs_subvol_make_label(const char *path) {
201 r = mac_selinux_create_file_prepare(path, S_IFDIR);
205 r = btrfs_subvol_make(path);
206 mac_selinux_create_file_clear();
211 return mac_smack_fix(path, false, false);
214 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
215 uint64_t flags, nflags;
220 if (fstat(fd, &st) < 0)
223 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
226 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
230 nflags = flags | BTRFS_SUBVOL_RDONLY;
232 nflags = flags & ~BTRFS_SUBVOL_RDONLY;
237 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
243 int btrfs_subvol_set_read_only(const char *path, bool b) {
244 _cleanup_close_ int fd = -1;
246 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
250 return btrfs_subvol_set_read_only_fd(fd, b);
253 int btrfs_subvol_get_read_only_fd(int fd) {
256 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
259 return !!(flags & BTRFS_SUBVOL_RDONLY);
262 int btrfs_reflink(int infd, int outfd) {
268 r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
275 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
276 struct btrfs_ioctl_clone_range_args args = {
278 .src_offset = in_offset,
280 .dest_offset = out_offset,
288 r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args);
295 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
296 struct btrfs_ioctl_fs_info_args fsi = {};
302 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
305 /* We won't do this for btrfs RAID */
306 if (fsi.num_devices != 1)
309 for (id = 1; id <= fsi.max_id; id++) {
310 struct btrfs_ioctl_dev_info_args di = {
315 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
322 if (stat((char*) di.path, &st) < 0)
325 if (!S_ISBLK(st.st_mode))
328 if (major(st.st_rdev) == 0)
338 int btrfs_get_block_device(const char *path, dev_t *dev) {
339 _cleanup_close_ int fd = -1;
344 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
348 return btrfs_get_block_device_fd(fd, dev);
351 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
352 struct btrfs_ioctl_ino_lookup_args args = {
353 .objectid = BTRFS_FIRST_FREE_OBJECTID
359 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
366 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
369 /* the objectid, type, offset together make up the btrfs key,
370 * which is considered a single 136byte integer when
371 * comparing. This call increases the counter by one, dealing
372 * with the overflow between the overflows */
374 if (args->key.min_offset < (uint64_t) -1) {
375 args->key.min_offset++;
379 if (args->key.min_type < (uint8_t) -1) {
380 args->key.min_type++;
381 args->key.min_offset = 0;
385 if (args->key.min_objectid < (uint64_t) -1) {
386 args->key.min_objectid++;
387 args->key.min_offset = 0;
388 args->key.min_type = 0;
395 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
399 args->key.min_objectid = h->objectid;
400 args->key.min_type = h->type;
401 args->key.min_offset = h->offset;
404 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
407 /* Compare min and max */
409 if (args->key.min_objectid < args->key.max_objectid)
411 if (args->key.min_objectid > args->key.max_objectid)
414 if (args->key.min_type < args->key.max_type)
416 if (args->key.min_type > args->key.max_type)
419 if (args->key.min_offset < args->key.max_offset)
421 if (args->key.min_offset > args->key.max_offset)
427 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
429 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
430 (i) < (args).key.nr_items; \
432 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
434 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
435 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
437 int btrfs_subvol_get_info_fd(int fd, BtrfsSubvolInfo *ret) {
438 struct btrfs_ioctl_search_args args = {
439 /* Tree of tree roots */
440 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
442 /* Look precisely for the subvolume items */
443 .key.min_type = BTRFS_ROOT_ITEM_KEY,
444 .key.max_type = BTRFS_ROOT_ITEM_KEY,
447 .key.max_offset = (uint64_t) -1,
449 /* No restrictions on the other components */
450 .key.min_transid = 0,
451 .key.max_transid = (uint64_t) -1,
461 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
465 args.key.min_objectid = args.key.max_objectid = subvol_id;
467 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
468 const struct btrfs_ioctl_search_header *sh;
471 args.key.nr_items = 256;
472 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
475 if (args.key.nr_items <= 0)
478 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
480 const struct btrfs_root_item *ri;
482 /* Make sure we start the next search at least from this entry */
483 btrfs_ioctl_search_args_set(&args, sh);
485 if (sh->objectid != subvol_id)
487 if (sh->type != BTRFS_ROOT_ITEM_KEY)
490 /* Older versions of the struct lacked the otime setting */
491 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
494 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
496 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
497 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
499 ret->subvol_id = subvol_id;
500 ret->read_only = !!(le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY);
502 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
503 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
504 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
510 /* Increase search key by one, to read the next item, if we can. */
511 if (!btrfs_ioctl_search_args_inc(&args))
522 int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *ret) {
524 struct btrfs_ioctl_search_args args = {
525 /* Tree of quota items */
526 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
528 /* The object ID is always 0 */
529 .key.min_objectid = 0,
530 .key.max_objectid = 0,
532 /* Look precisely for the quota items */
533 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
534 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
536 /* No restrictions on the other components */
537 .key.min_transid = 0,
538 .key.max_transid = (uint64_t) -1,
542 bool found_info = false, found_limit = false;
548 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
552 args.key.min_offset = args.key.max_offset = subvol_id;
554 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
555 const struct btrfs_ioctl_search_header *sh;
558 args.key.nr_items = 256;
559 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
562 if (args.key.nr_items <= 0)
565 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
567 /* Make sure we start the next search at least from this entry */
568 btrfs_ioctl_search_args_set(&args, sh);
570 if (sh->objectid != 0)
572 if (sh->offset != subvol_id)
575 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
576 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
578 ret->referenced = le64toh(qii->rfer);
579 ret->exclusive = le64toh(qii->excl);
583 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
584 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
586 ret->referenced_max = le64toh(qli->max_rfer);
587 ret->exclusive_max = le64toh(qli->max_excl);
589 if (ret->referenced_max == 0)
590 ret->referenced_max = (uint64_t) -1;
591 if (ret->exclusive_max == 0)
592 ret->exclusive_max = (uint64_t) -1;
597 if (found_info && found_limit)
601 /* Increase search key by one, to read the next item, if we can. */
602 if (!btrfs_ioctl_search_args_inc(&args))
607 if (!found_limit && !found_info)
611 ret->referenced = (uint64_t) -1;
612 ret->exclusive = (uint64_t) -1;
616 ret->referenced_max = (uint64_t) -1;
617 ret->exclusive_max = (uint64_t) -1;
623 int btrfs_defrag_fd(int fd) {
626 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
632 int btrfs_defrag(const char *p) {
633 _cleanup_close_ int fd = -1;
635 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
639 return btrfs_defrag_fd(fd);
642 int btrfs_quota_enable_fd(int fd, bool b) {
643 struct btrfs_ioctl_quota_ctl_args args = {
644 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
649 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
655 int btrfs_quota_enable(const char *path, bool b) {
656 _cleanup_close_ int fd = -1;
658 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
662 return btrfs_quota_enable_fd(fd, b);
665 int btrfs_quota_limit_fd(int fd, uint64_t referenced_max) {
666 struct btrfs_ioctl_qgroup_limit_args args = {
668 referenced_max == (uint64_t) -1 ? 0 :
669 referenced_max == 0 ? 1 : referenced_max,
670 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
675 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0)
681 int btrfs_quota_limit(const char *path, uint64_t referenced_max) {
682 _cleanup_close_ int fd = -1;
684 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
688 return btrfs_quota_limit_fd(fd, referenced_max);
691 int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) {
692 struct btrfs_ioctl_vol_args args = {};
693 _cleanup_free_ char *p = NULL, *loop = NULL, *backing = NULL;
694 _cleanup_close_ int loop_fd = -1, backing_fd = -1;
699 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
700 if (new_size < 16*1024*1024)
701 new_size = 16*1024*1024;
703 r = btrfs_get_block_device_fd(fd, &dev);
709 if (asprintf(&p, "/sys/dev/block/%u:%u/loop/backing_file", major(dev), minor(dev)) < 0)
711 r = read_one_line_file(p, &backing);
716 if (isempty(backing) || !path_is_absolute(backing))
719 backing_fd = open(backing, O_RDWR|O_CLOEXEC|O_NOCTTY);
723 if (fstat(backing_fd, &st) < 0)
725 if (!S_ISREG(st.st_mode))
728 if (new_size == (uint64_t) st.st_size)
731 if (grow_only && new_size < (uint64_t) st.st_size)
734 if (asprintf(&loop, "/dev/block/%u:%u", major(dev), minor(dev)) < 0)
736 loop_fd = open(loop, O_RDWR|O_CLOEXEC|O_NOCTTY);
740 if (snprintf(args.name, sizeof(args.name), "%" PRIu64, new_size) >= (int) sizeof(args.name))
743 if (new_size < (uint64_t) st.st_size) {
744 /* Decrease size: first decrease btrfs size, then shorten loopback */
745 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
749 if (ftruncate(backing_fd, new_size) < 0)
752 if (ioctl(loop_fd, LOOP_SET_CAPACITY, 0) < 0)
755 if (new_size > (uint64_t) st.st_size) {
756 /* Increase size: first enlarge loopback, then increase btrfs size */
757 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
761 /* Make sure the free disk space is correctly updated for both file systems */
763 (void) fsync(backing_fd);
768 int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) {
769 _cleanup_close_ int fd = -1;
771 fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
775 return btrfs_resize_loopback_fd(fd, new_size, grow_only);
778 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, bool recursive) {
779 struct btrfs_ioctl_search_args args = {
780 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
782 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
783 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
785 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
786 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
788 .key.min_transid = 0,
789 .key.max_transid = (uint64_t) -1,
792 struct btrfs_ioctl_vol_args vol_args = {};
793 _cleanup_close_ int subvol_fd = -1;
799 /* First, try to remove the subvolume. If it happens to be
800 * already empty, this will just work. */
801 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
802 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0)
804 if (!recursive || errno != ENOTEMPTY)
807 /* OK, the subvolume is not empty, let's look for child
808 * subvolumes, and remove them, first */
809 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
813 if (subvol_id == 0) {
814 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
819 args.key.min_offset = args.key.max_offset = subvol_id;
821 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
822 const struct btrfs_ioctl_search_header *sh;
825 args.key.nr_items = 256;
826 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
829 if (args.key.nr_items <= 0)
832 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
833 _cleanup_free_ char *p = NULL;
834 const struct btrfs_root_ref *ref;
835 struct btrfs_ioctl_ino_lookup_args ino_args;
837 btrfs_ioctl_search_args_set(&args, sh);
839 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
841 if (sh->offset != subvol_id)
844 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
846 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
851 ino_args.treeid = subvol_id;
852 ino_args.objectid = ref->dirid;
854 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
857 if (isempty(ino_args.name))
858 /* Subvolume is in the top-level
859 * directory of the subvolume. */
860 r = subvol_remove_children(subvol_fd, p, sh->objectid, recursive);
862 _cleanup_close_ int child_fd = -1;
864 /* Subvolume is somewhere further down,
865 * hence we need to open the
866 * containing directory first */
868 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
872 r = subvol_remove_children(child_fd, p, sh->objectid, recursive);
878 /* Increase search key by one, to read the next item, if we can. */
879 if (!btrfs_ioctl_search_args_inc(&args))
883 /* OK, the child subvolumes should all be gone now, let's try
884 * again to remove the subvolume */
885 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
891 int btrfs_subvol_remove(const char *path, bool recursive) {
892 _cleanup_close_ int fd = -1;
893 const char *subvolume;
898 r = extract_subvolume_name(path, &subvolume);
902 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
906 return subvol_remove_children(fd, subvolume, 0, recursive);
909 int btrfs_subvol_remove_fd(int fd, const char *subvolume, bool recursive) {
910 return subvol_remove_children(fd, subvolume, 0, recursive);