chiark / gitweb /
shared: add minimal firewall manipulation helpers for establishing NAT rules, using...
[elogind.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/statfs.h>
23 #include <linux/fs.h>
24 #include <fcntl.h>
25
26 #include "strv.h"
27 #include "utf8.h"
28 #include "btrfs-util.h"
29 #include "path-util.h"
30 #include "copy.h"
31 #include "machine-image.h"
32
33 static const char image_search_path[] =
34         "/var/lib/machines\0"
35         "/var/lib/container\0"
36         "/usr/local/lib/machines\0"
37         "/usr/lib/machines\0";
38
39 Image *image_unref(Image *i) {
40         if (!i)
41                 return NULL;
42
43         free(i->name);
44         free(i->path);
45         free(i);
46         return NULL;
47 }
48
49 static int image_new(
50                 ImageType t,
51                 const char *pretty,
52                 const char *path,
53                 const char *filename,
54                 bool read_only,
55                 usec_t crtime,
56                 usec_t mtime,
57                 Image **ret) {
58
59         _cleanup_(image_unrefp) Image *i = NULL;
60
61         assert(t >= 0);
62         assert(t < _IMAGE_TYPE_MAX);
63         assert(pretty);
64         assert(filename);
65         assert(ret);
66
67         i = new0(Image, 1);
68         if (!i)
69                 return -ENOMEM;
70
71         i->type = t;
72         i->read_only = read_only;
73         i->crtime = crtime;
74         i->mtime = mtime;
75         i->size = i->size_exclusive = (uint64_t) -1;
76         i->limit = i->limit_exclusive = (uint64_t) -1;
77
78         i->name = strdup(pretty);
79         if (!i->name)
80                 return -ENOMEM;
81
82         if (path)
83                 i->path = strjoin(path, "/", filename, NULL);
84         else
85                 i->path = strdup(filename);
86
87         if (!i->path)
88                 return -ENOMEM;
89
90         path_kill_slashes(i->path);
91
92         *ret = i;
93         i = NULL;
94
95         return 0;
96 }
97
98 static int image_make(
99                 const char *pretty,
100                 int dfd,
101                 const char *path,
102                 const char *filename,
103                 Image **ret) {
104
105         struct stat st;
106         bool read_only;
107         int r;
108
109         assert(filename);
110
111         /* We explicitly *do* follow symlinks here, since we want to
112          * allow symlinking trees into /var/lib/container/, and treat
113          * them normally. */
114
115         if (fstatat(dfd, filename, &st, 0) < 0)
116                 return -errno;
117
118         read_only =
119                 (path && path_startswith(path, "/usr")) ||
120                 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
121
122         if (S_ISDIR(st.st_mode)) {
123
124                 if (!ret)
125                         return 1;
126
127                 if (!pretty)
128                         pretty = filename;
129
130                 /* btrfs subvolumes have inode 256 */
131                 if (st.st_ino == 256) {
132                         _cleanup_close_ int fd = -1;
133                         struct statfs sfs;
134
135                         fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
136                         if (fd < 0)
137                                 return -errno;
138
139                         if (fstatfs(fd, &sfs) < 0)
140                                 return -errno;
141
142                         if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
143                                 BtrfsSubvolInfo info;
144                                 BtrfsQuotaInfo quota;
145
146                                 /* It's a btrfs subvolume */
147
148                                 r = btrfs_subvol_get_info_fd(fd, &info);
149                                 if (r < 0)
150                                         return r;
151
152                                 r = image_new(IMAGE_SUBVOLUME,
153                                               pretty,
154                                               path,
155                                               filename,
156                                               info.read_only || read_only,
157                                               info.otime,
158                                               0,
159                                               ret);
160                                 if (r < 0)
161                                         return r;
162
163                                 r = btrfs_subvol_get_quota_fd(fd, &quota);
164                                 if (r >= 0) {
165                                         (*ret)->size = quota.referred;
166                                         (*ret)->size_exclusive = quota.exclusive;
167
168                                         (*ret)->limit = quota.referred_max;
169                                         (*ret)->limit_exclusive = quota.exclusive_max;
170                                 }
171
172                                 return 1;
173                         }
174                 }
175
176                 /* It's just a normal directory. */
177
178                 r = image_new(IMAGE_DIRECTORY,
179                               pretty,
180                               path,
181                               filename,
182                               read_only,
183                               0,
184                               0,
185                               ret);
186                 if (r < 0)
187                         return r;
188
189                 return 1;
190
191         } else if (S_ISREG(st.st_mode) && endswith(filename, ".gpt")) {
192                 usec_t crtime = 0;
193
194                 /* It's a GPT block device */
195
196                 if (!ret)
197                         return 1;
198
199                 fd_getcrtime_at(dfd, filename, &crtime, 0);
200
201                 if (!pretty)
202                         pretty = strndupa(filename, strlen(filename) - 4);
203
204                 r = image_new(IMAGE_GPT,
205                               pretty,
206                               path,
207                               filename,
208                               !(st.st_mode & 0222) || read_only,
209                               crtime,
210                               timespec_load(&st.st_mtim),
211                               ret);
212                 if (r < 0)
213                         return r;
214
215                 (*ret)->size = (*ret)->size_exclusive = st.st_blocks * 512;
216                 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
217
218                 return 1;
219         }
220
221         return 0;
222 }
223
224 int image_find(const char *name, Image **ret) {
225         const char *path;
226         int r;
227
228         assert(name);
229
230         /* There are no images with invalid names */
231         if (!image_name_is_valid(name))
232                 return 0;
233
234         NULSTR_FOREACH(path, image_search_path) {
235                 _cleanup_closedir_ DIR *d = NULL;
236
237                 d = opendir(path);
238                 if (!d) {
239                         if (errno == ENOENT)
240                                 continue;
241
242                         return -errno;
243                 }
244
245                 r = image_make(NULL, dirfd(d), path, name, ret);
246                 if (r == 0 || r == -ENOENT) {
247                         _cleanup_free_ char *gpt = NULL;
248
249                         gpt = strappend(name, ".gpt");
250                         if (!gpt)
251                                 return -ENOMEM;
252
253                         r = image_make(NULL, dirfd(d), path, gpt, ret);
254                         if (r == 0 || r == -ENOENT)
255                                 continue;
256                 }
257                 if (r < 0)
258                         return r;
259
260                 return 1;
261         }
262
263         if (streq(name, ".host"))
264                 return image_make(".host", AT_FDCWD, NULL, "/", ret);
265
266         return 0;
267 };
268
269 int image_discover(Hashmap *h) {
270         const char *path;
271         int r;
272
273         assert(h);
274
275         NULSTR_FOREACH(path, image_search_path) {
276                 _cleanup_closedir_ DIR *d = NULL;
277                 struct dirent *de;
278
279                 d = opendir(path);
280                 if (!d) {
281                         if (errno == ENOENT)
282                                 continue;
283
284                         return -errno;
285                 }
286
287                 FOREACH_DIRENT_ALL(de, d, return -errno) {
288                         _cleanup_(image_unrefp) Image *image = NULL;
289
290                         if (!image_name_is_valid(de->d_name))
291                                 continue;
292
293                         if (hashmap_contains(h, de->d_name))
294                                 continue;
295
296                         r = image_make(NULL, dirfd(d), path, de->d_name, &image);
297                         if (r == 0 || r == -ENOENT)
298                                 continue;
299                         if (r < 0)
300                                 return r;
301
302                         r = hashmap_put(h, image->name, image);
303                         if (r < 0)
304                                 return r;
305
306                         image = NULL;
307                 }
308         }
309
310         if (!hashmap_contains(h, ".host")) {
311                 _cleanup_(image_unrefp) Image *image = NULL;
312
313                 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
314                 if (r < 0)
315                         return r;
316
317                 r = hashmap_put(h, image->name, image);
318                 if (r < 0)
319                         return r;
320
321                 image = NULL;
322
323         }
324
325         return 0;
326 }
327
328 void image_hashmap_free(Hashmap *map) {
329         Image *i;
330
331         while ((i = hashmap_steal_first(map)))
332                 image_unref(i);
333
334         hashmap_free(map);
335 }
336
337 int image_remove(Image *i) {
338         assert(i);
339
340         if (path_equal(i->path, "/") ||
341             path_startswith(i->path, "/usr"))
342                 return -EROFS;
343
344         switch (i->type) {
345
346         case IMAGE_SUBVOLUME:
347                 return btrfs_subvol_remove(i->path);
348
349         case IMAGE_DIRECTORY:
350         case IMAGE_GPT:
351                 return rm_rf_dangerous(i->path, false, true, false);
352
353         default:
354                 return -ENOTSUP;
355         }
356 }
357
358 int image_rename(Image *i, const char *new_name) {
359         _cleanup_free_ char *new_path = NULL, *nn = NULL;
360         int r;
361
362         assert(i);
363
364         if (!image_name_is_valid(new_name))
365                 return -EINVAL;
366
367         if (path_equal(i->path, "/") ||
368             path_startswith(i->path, "/usr"))
369                 return -EROFS;
370
371         r = image_find(new_name, NULL);
372         if (r < 0)
373                 return r;
374         if (r > 0)
375                 return -EEXIST;
376
377         switch (i->type) {
378
379         case IMAGE_SUBVOLUME:
380         case IMAGE_DIRECTORY:
381                 new_path = file_in_same_dir(i->path, new_name);
382                 break;
383
384         case IMAGE_GPT: {
385                 const char *fn;
386
387                 fn = strappenda(new_name, ".gpt");
388                 new_path = file_in_same_dir(i->path, fn);
389                 break;
390         }
391
392         default:
393                 return -ENOTSUP;
394         }
395
396         if (!new_path)
397                 return -ENOMEM;
398
399         nn = strdup(new_name);
400         if (!nn)
401                 return -ENOMEM;
402
403         if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0)
404                 return -errno;
405
406         free(i->path);
407         i->path = new_path;
408         new_path = NULL;
409
410         free(i->name);
411         i->name = nn;
412         nn = NULL;
413
414         return 0;
415 }
416
417 int image_clone(Image *i, const char *new_name, bool read_only) {
418         const char *new_path;
419         int r;
420
421         assert(i);
422
423         if (!image_name_is_valid(new_name))
424                 return -EINVAL;
425
426         r = image_find(new_name, NULL);
427         if (r < 0)
428                 return r;
429         if (r > 0)
430                 return -EEXIST;
431
432         switch (i->type) {
433
434         case IMAGE_SUBVOLUME:
435         case IMAGE_DIRECTORY:
436                 new_path = strappenda("/var/lib/container/", new_name);
437
438                 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
439                 break;
440
441         case IMAGE_GPT:
442                 new_path = strappenda("/var/lib/container/", new_name, ".gpt");
443
444                 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
445                 break;
446
447         default:
448                 return -ENOTSUP;
449         }
450
451         if (r < 0)
452                 return r;
453
454         return 0;
455 }
456
457 int image_read_only(Image *i, bool b) {
458         int r;
459         assert(i);
460
461         if (path_equal(i->path, "/") ||
462             path_startswith(i->path, "/usr"))
463                 return -EROFS;
464
465         switch (i->type) {
466
467         case IMAGE_SUBVOLUME:
468                 r = btrfs_subvol_set_read_only(i->path, b);
469                 if (r < 0)
470                         return r;
471                 break;
472
473         case IMAGE_GPT: {
474                 struct stat st;
475
476                 if (stat(i->path, &st) < 0)
477                         return -errno;
478
479                 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
480                         return -errno;
481
482                 /* If the images is now read-only, it's a good time to
483                  * defrag it, given that no write patterns will
484                  * fragment it again. */
485                 if (b)
486                         (void) btrfs_defrag(i->path);
487                 break;
488         }
489
490         case IMAGE_DIRECTORY:
491         default:
492                 return -ENOTSUP;
493         }
494
495         return 0;
496 }
497
498 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
499         [IMAGE_DIRECTORY] = "directory",
500         [IMAGE_SUBVOLUME] = "subvolume",
501         [IMAGE_GPT] = "gpt",
502 };
503
504 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);