chiark / gitweb /
pty: minor modernization
[elogind.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/statfs.h>
23 #include <linux/fs.h>
24 #include <fcntl.h>
25
26 #include "strv.h"
27 #include "utf8.h"
28 #include "btrfs-util.h"
29 #include "path-util.h"
30 #include "copy.h"
31 #include "machine-image.h"
32
33 static const char image_search_path[] =
34         "/var/lib/machines\0"
35         "/var/lib/container\0"
36         "/usr/local/lib/machines\0"
37         "/usr/lib/machines\0";
38
39 Image *image_unref(Image *i) {
40         if (!i)
41                 return NULL;
42
43         free(i->name);
44         free(i->path);
45         free(i);
46         return NULL;
47 }
48
49 static int image_new(
50                 ImageType t,
51                 const char *pretty,
52                 const char *path,
53                 const char *filename,
54                 bool read_only,
55                 usec_t crtime,
56                 usec_t mtime,
57                 Image **ret) {
58
59         _cleanup_(image_unrefp) Image *i = NULL;
60
61         assert(t >= 0);
62         assert(t < _IMAGE_TYPE_MAX);
63         assert(pretty);
64         assert(filename);
65         assert(ret);
66
67         i = new0(Image, 1);
68         if (!i)
69                 return -ENOMEM;
70
71         i->type = t;
72         i->read_only = read_only;
73         i->crtime = crtime;
74         i->mtime = mtime;
75         i->size = i->size_exclusive = (uint64_t) -1;
76         i->limit = i->limit_exclusive = (uint64_t) -1;
77
78         i->name = strdup(pretty);
79         if (!i->name)
80                 return -ENOMEM;
81
82         if (path)
83                 i->path = strjoin(path, "/", filename, NULL);
84         else
85                 i->path = strdup(filename);
86
87         if (!i->path)
88                 return -ENOMEM;
89
90         path_kill_slashes(i->path);
91
92         *ret = i;
93         i = NULL;
94
95         return 0;
96 }
97
98 static int image_make(
99                 const char *pretty,
100                 int dfd,
101                 const char *path,
102                 const char *filename,
103                 Image **ret) {
104
105         struct stat st;
106         bool read_only;
107         int r;
108
109         assert(filename);
110
111         /* We explicitly *do* follow symlinks here, since we want to
112          * allow symlinking trees into /var/lib/container/, and treat
113          * them normally. */
114
115         if (fstatat(dfd, filename, &st, 0) < 0)
116                 return -errno;
117
118         read_only =
119                 (path && path_startswith(path, "/usr")) ||
120                 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
121
122         if (S_ISDIR(st.st_mode)) {
123                 _cleanup_close_ int fd = -1;
124                 unsigned file_attr = 0;
125
126                 if (!ret)
127                         return 1;
128
129                 if (!pretty)
130                         pretty = filename;
131
132                 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
133                 if (fd < 0)
134                         return -errno;
135
136                 /* btrfs subvolumes have inode 256 */
137                 if (st.st_ino == 256) {
138                         struct statfs sfs;
139
140                         if (fstatfs(fd, &sfs) < 0)
141                                 return -errno;
142
143                         if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
144                                 BtrfsSubvolInfo info;
145                                 BtrfsQuotaInfo quota;
146
147                                 /* It's a btrfs subvolume */
148
149                                 r = btrfs_subvol_get_info_fd(fd, &info);
150                                 if (r < 0)
151                                         return r;
152
153                                 r = image_new(IMAGE_SUBVOLUME,
154                                               pretty,
155                                               path,
156                                               filename,
157                                               info.read_only || read_only,
158                                               info.otime,
159                                               0,
160                                               ret);
161                                 if (r < 0)
162                                         return r;
163
164                                 r = btrfs_subvol_get_quota_fd(fd, &quota);
165                                 if (r >= 0) {
166                                         (*ret)->size = quota.referred;
167                                         (*ret)->size_exclusive = quota.exclusive;
168
169                                         (*ret)->limit = quota.referred_max;
170                                         (*ret)->limit_exclusive = quota.exclusive_max;
171                                 }
172
173                                 return 1;
174                         }
175                 }
176
177                 /* If the IMMUTABLE bit is set, we consider the
178                  * directory read-only. Since the ioctl is not
179                  * supported everywhere we ignore failures. */
180                 (void) read_attr_fd(fd, &file_attr);
181
182                 /* It's just a normal directory. */
183                 r = image_new(IMAGE_DIRECTORY,
184                               pretty,
185                               path,
186                               filename,
187                               read_only || (file_attr & FS_IMMUTABLE_FL),
188                               0,
189                               0,
190                               ret);
191                 if (r < 0)
192                         return r;
193
194                 return 1;
195
196         } else if (S_ISREG(st.st_mode) && endswith(filename, ".gpt")) {
197                 usec_t crtime = 0;
198
199                 /* It's a GPT block device */
200
201                 if (!ret)
202                         return 1;
203
204                 fd_getcrtime_at(dfd, filename, &crtime, 0);
205
206                 if (!pretty)
207                         pretty = strndupa(filename, strlen(filename) - 4);
208
209                 r = image_new(IMAGE_GPT,
210                               pretty,
211                               path,
212                               filename,
213                               !(st.st_mode & 0222) || read_only,
214                               crtime,
215                               timespec_load(&st.st_mtim),
216                               ret);
217                 if (r < 0)
218                         return r;
219
220                 (*ret)->size = (*ret)->size_exclusive = st.st_blocks * 512;
221                 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
222
223                 return 1;
224         }
225
226         return 0;
227 }
228
229 int image_find(const char *name, Image **ret) {
230         const char *path;
231         int r;
232
233         assert(name);
234
235         /* There are no images with invalid names */
236         if (!image_name_is_valid(name))
237                 return 0;
238
239         NULSTR_FOREACH(path, image_search_path) {
240                 _cleanup_closedir_ DIR *d = NULL;
241
242                 d = opendir(path);
243                 if (!d) {
244                         if (errno == ENOENT)
245                                 continue;
246
247                         return -errno;
248                 }
249
250                 r = image_make(NULL, dirfd(d), path, name, ret);
251                 if (r == 0 || r == -ENOENT) {
252                         _cleanup_free_ char *gpt = NULL;
253
254                         gpt = strappend(name, ".gpt");
255                         if (!gpt)
256                                 return -ENOMEM;
257
258                         r = image_make(NULL, dirfd(d), path, gpt, ret);
259                         if (r == 0 || r == -ENOENT)
260                                 continue;
261                 }
262                 if (r < 0)
263                         return r;
264
265                 return 1;
266         }
267
268         if (streq(name, ".host"))
269                 return image_make(".host", AT_FDCWD, NULL, "/", ret);
270
271         return 0;
272 };
273
274 int image_discover(Hashmap *h) {
275         const char *path;
276         int r;
277
278         assert(h);
279
280         NULSTR_FOREACH(path, image_search_path) {
281                 _cleanup_closedir_ DIR *d = NULL;
282                 struct dirent *de;
283
284                 d = opendir(path);
285                 if (!d) {
286                         if (errno == ENOENT)
287                                 continue;
288
289                         return -errno;
290                 }
291
292                 FOREACH_DIRENT_ALL(de, d, return -errno) {
293                         _cleanup_(image_unrefp) Image *image = NULL;
294
295                         if (!image_name_is_valid(de->d_name))
296                                 continue;
297
298                         if (hashmap_contains(h, de->d_name))
299                                 continue;
300
301                         r = image_make(NULL, dirfd(d), path, de->d_name, &image);
302                         if (r == 0 || r == -ENOENT)
303                                 continue;
304                         if (r < 0)
305                                 return r;
306
307                         r = hashmap_put(h, image->name, image);
308                         if (r < 0)
309                                 return r;
310
311                         image = NULL;
312                 }
313         }
314
315         if (!hashmap_contains(h, ".host")) {
316                 _cleanup_(image_unrefp) Image *image = NULL;
317
318                 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
319                 if (r < 0)
320                         return r;
321
322                 r = hashmap_put(h, image->name, image);
323                 if (r < 0)
324                         return r;
325
326                 image = NULL;
327
328         }
329
330         return 0;
331 }
332
333 void image_hashmap_free(Hashmap *map) {
334         Image *i;
335
336         while ((i = hashmap_steal_first(map)))
337                 image_unref(i);
338
339         hashmap_free(map);
340 }
341
342 int image_remove(Image *i) {
343         assert(i);
344
345         if (path_equal(i->path, "/") ||
346             path_startswith(i->path, "/usr"))
347                 return -EROFS;
348
349         switch (i->type) {
350
351         case IMAGE_SUBVOLUME:
352                 return btrfs_subvol_remove(i->path);
353
354         case IMAGE_DIRECTORY:
355                 /* Allow deletion of read-only directories */
356                 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
357
358                 /* fall through */
359
360         case IMAGE_GPT:
361                 return rm_rf_dangerous(i->path, false, true, false);
362
363         default:
364                 return -ENOTSUP;
365         }
366 }
367
368 int image_rename(Image *i, const char *new_name) {
369         _cleanup_free_ char *new_path = NULL, *nn = NULL;
370         unsigned file_attr = 0;
371         int r;
372
373         assert(i);
374
375         if (!image_name_is_valid(new_name))
376                 return -EINVAL;
377
378         if (path_equal(i->path, "/") ||
379             path_startswith(i->path, "/usr"))
380                 return -EROFS;
381
382         r = image_find(new_name, NULL);
383         if (r < 0)
384                 return r;
385         if (r > 0)
386                 return -EEXIST;
387
388         switch (i->type) {
389
390         case IMAGE_DIRECTORY:
391                 /* Turn of the immutable bit while we rename the image, so that we can rename it */
392                 (void) read_attr_path(i->path, &file_attr);
393
394                 if (file_attr & FS_IMMUTABLE_FL)
395                         (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
396
397                 /* fall through */
398
399         case IMAGE_SUBVOLUME:
400                 new_path = file_in_same_dir(i->path, new_name);
401                 break;
402
403         case IMAGE_GPT: {
404                 const char *fn;
405
406                 fn = strappenda(new_name, ".gpt");
407                 new_path = file_in_same_dir(i->path, fn);
408                 break;
409         }
410
411         default:
412                 return -ENOTSUP;
413         }
414
415         if (!new_path)
416                 return -ENOMEM;
417
418         nn = strdup(new_name);
419         if (!nn)
420                 return -ENOMEM;
421
422         if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0)
423                 return -errno;
424
425         /* Restore the immutable bit, if it was set before */
426         if (file_attr & FS_IMMUTABLE_FL)
427                 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
428
429         free(i->path);
430         i->path = new_path;
431         new_path = NULL;
432
433         free(i->name);
434         i->name = nn;
435         nn = NULL;
436
437         return 0;
438 }
439
440 int image_clone(Image *i, const char *new_name, bool read_only) {
441         const char *new_path;
442         int r;
443
444         assert(i);
445
446         if (!image_name_is_valid(new_name))
447                 return -EINVAL;
448
449         r = image_find(new_name, NULL);
450         if (r < 0)
451                 return r;
452         if (r > 0)
453                 return -EEXIST;
454
455         switch (i->type) {
456
457         case IMAGE_SUBVOLUME:
458         case IMAGE_DIRECTORY:
459                 new_path = strappenda("/var/lib/container/", new_name);
460
461                 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
462                 break;
463
464         case IMAGE_GPT:
465                 new_path = strappenda("/var/lib/container/", new_name, ".gpt");
466
467                 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
468                 break;
469
470         default:
471                 return -ENOTSUP;
472         }
473
474         if (r < 0)
475                 return r;
476
477         return 0;
478 }
479
480 int image_read_only(Image *i, bool b) {
481         int r;
482         assert(i);
483
484         if (path_equal(i->path, "/") ||
485             path_startswith(i->path, "/usr"))
486                 return -EROFS;
487
488         switch (i->type) {
489
490         case IMAGE_SUBVOLUME:
491                 r = btrfs_subvol_set_read_only(i->path, b);
492                 if (r < 0)
493                         return r;
494
495                 break;
496
497         case IMAGE_DIRECTORY:
498                 /* For simple directory trees we cannot use the access
499                    mode of the top-level directory, since it has an
500                    effect on the container itself.  However, we can
501                    use the "immutable" flag, to at least make the
502                    top-level directory read-only. It's not as good as
503                    a read-only subvolume, but at least something, and
504                    we can read the value back.*/
505
506                 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
507                 if (r < 0)
508                         return r;
509
510                 break;
511
512         case IMAGE_GPT: {
513                 struct stat st;
514
515                 if (stat(i->path, &st) < 0)
516                         return -errno;
517
518                 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
519                         return -errno;
520
521                 /* If the images is now read-only, it's a good time to
522                  * defrag it, given that no write patterns will
523                  * fragment it again. */
524                 if (b)
525                         (void) btrfs_defrag(i->path);
526                 break;
527         }
528
529         default:
530                 return -ENOTSUP;
531         }
532
533         return 0;
534 }
535
536 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
537         [IMAGE_DIRECTORY] = "directory",
538         [IMAGE_SUBVOLUME] = "subvolume",
539         [IMAGE_GPT] = "gpt",
540 };
541
542 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);