chiark / gitweb /
bus: add minimal locking around the memfd cache
[elogind.git] / src / libsystemd-bus / bus-kernel.c
index 3aa408414ee3f79029d5e090e4f17bb4d2021b91..699d24185ea1e1c521fd87c009a58c66030f01bc 100644 (file)
@@ -45,8 +45,6 @@
 #define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
 #define KDBUS_ITEM_SIZE(s) ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE)
 
-#define KDBUS_POOL_SIZE (4*1024*1024)
-
 static int parse_unique_name(const char *s, uint64_t *id) {
         int r;
 
@@ -65,19 +63,36 @@ static int parse_unique_name(const char *s, uint64_t *id) {
 
 static void append_payload_vec(struct kdbus_item **d, const void *p, size_t sz) {
         assert(d);
-        assert(p);
         assert(sz > 0);
 
         *d = ALIGN8_PTR(*d);
 
+        /* Note that p can be NULL, which encodes a region full of
+         * zeroes, which is useful to optimize certain padding
+         * conditions */
+
         (*d)->size = offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec);
         (*d)->type = KDBUS_MSG_PAYLOAD_VEC;
-        (*d)->vec.address = (uint64_t) p;
+        (*d)->vec.address = PTR_TO_UINT64(p);
         (*d)->vec.size = sz;
 
         *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size);
 }
 
+static void append_payload_memfd(struct kdbus_item **d, int memfd, size_t sz) {
+        assert(d);
+        assert(memfd >= 0);
+        assert(sz > 0);
+
+        *d = ALIGN8_PTR(*d);
+        (*d)->size = offsetof(struct kdbus_item, memfd) + sizeof(struct kdbus_memfd);
+        (*d)->type = KDBUS_MSG_PAYLOAD_MEMFD;
+        (*d)->memfd.fd = memfd;
+        (*d)->memfd.size = sz;
+
+        *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size);
+}
+
 static void append_destination(struct kdbus_item **d, const char *s, size_t length) {
         assert(d);
         assert(s);
@@ -210,8 +225,11 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) {
 
         sz = offsetof(struct kdbus_msg, items);
 
+        assert_cc(ALIGN8(offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec)) ==
+                  ALIGN8(offsetof(struct kdbus_item, memfd) + sizeof(struct kdbus_memfd)));
+
         /* Add in fixed header, fields header and payload */
-        sz += (1 + !!m->fields + m->n_body_parts) *
+        sz += (1 + m->n_body_parts) *
                 ALIGN8(offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec));
 
         /* Add space for bloom filter */
@@ -228,9 +246,12 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) {
                 sz += ALIGN8(offsetof(struct kdbus_item, fds) + sizeof(int)*m->n_fds);
 
         m->kdbus = memalign(8, sz);
-        if (!m->kdbus)
-                return -ENOMEM;
+        if (!m->kdbus) {
+                r = -ENOMEM;
+                goto fail;
+        }
 
+        m->free_kdbus = true;
         memset(m->kdbus, 0, sz);
 
         m->kdbus->flags =
@@ -249,24 +270,43 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) {
         if (well_known)
                 append_destination(&d, m->destination, dl);
 
-        append_payload_vec(&d, m->header, sizeof(*m->header));
+        append_payload_vec(&d, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+
+        MESSAGE_FOREACH_PART(part, i, m) {
+                if (part->is_zero) {
+                        /* If this is padding then simply send a
+                         * vector with a NULL data pointer which the
+                         * kernel will just pass through. This is the
+                         * most efficient way to encode zeroes */
+
+                        append_payload_vec(&d, NULL, part->size);
+                        continue;
+                }
+
+                if (part->memfd >= 0 && part->sealed && m->destination) {
+                        /* Try to send a memfd, if the part is
+                         * sealed and this is not a broadcast. Since we can only  */
+
+                        append_payload_memfd(&d, part->memfd, part->size);
+                        continue;
+                }
 
-        if (m->fields)
-                append_payload_vec(&d, m->fields, ALIGN8(m->header->fields_size));
+                /* Otherwise let's send a vector to the actual data,
+                 * for that we need to map it first. */
+                r = bus_body_part_map(part);
+                if (r < 0)
+                        goto fail;
 
-        MESSAGE_FOREACH_PART(part, i, m)
                 append_payload_vec(&d, part->data, part->size);
+        }
 
         if (m->kdbus->dst_id == KDBUS_DST_ID_BROADCAST) {
                 void *p;
 
                 p = append_bloom(&d, BLOOM_SIZE);
                 r = bus_message_setup_bloom(m, p);
-                if (r < 0) {
-                        free(m->kdbus);
-                        m->kdbus = NULL;
-                        return -r;
-                }
+                if (r < 0)
+                        goto fail;
         }
 
         if (m->n_fds > 0)
@@ -275,9 +315,11 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) {
         m->kdbus->size = (uint8_t*) d - (uint8_t*) m->kdbus;
         assert(m->kdbus->size <= sz);
 
-        m->free_kdbus = true;
-
         return 0;
+
+fail:
+        m->poisoned = true;
+        return r;
 }
 
 int bus_kernel_take_fd(sd_bus *b) {
@@ -391,33 +433,17 @@ static void close_kdbus_msg(sd_bus *bus, struct kdbus_msg *k) {
 
         KDBUS_ITEM_FOREACH(d, k) {
 
-                if (d->type != KDBUS_MSG_FDS)
-                        continue;
-
-                close_many(d->fds, (d->size - offsetof(struct kdbus_item, fds)) / sizeof(int));
+                if (d->type == KDBUS_MSG_FDS)
+                        close_many(d->fds, (d->size - offsetof(struct kdbus_item, fds)) / sizeof(int));
+                else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD)
+                        close_nointr_nofail(d->memfd.fd);
         }
 }
 
-static bool range_contains(
-                size_t astart, size_t asize,
-                size_t bstart, size_t bsize,
-                void *a, void **b) {
-
-        if (bstart < astart)
-                return false;
-
-        if (bstart + bsize > astart + asize)
-                return false;
-
-        *b = (uint8_t*) a + (bstart - astart);
-
-        return true;
-}
-
 static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_message **ret) {
         sd_bus_message *m = NULL;
         struct kdbus_item *d;
-        unsigned n_payload = 0, n_fds = 0;
+        unsigned n_fds = 0;
         _cleanup_free_ int *fds = NULL;
         struct bus_header *h = NULL;
         size_t total, n_bytes = 0, idx = 0;
@@ -439,15 +465,21 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
                 if (d->type == KDBUS_MSG_PAYLOAD_VEC) {
 
                         if (!h) {
-                                if (d->vec.size < sizeof(struct bus_header))
-                                        return -EBADMSG;
-
                                 h = UINT64_TO_PTR(d->vec.address);
+
+                                if (!bus_header_is_complete(h, d->vec.size))
+                                        return -EBADMSG;
                         }
 
-                        n_payload++;
                         n_bytes += d->vec.size;
 
+                } else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD) {
+
+                        if (!h)
+                                return -EBADMSG;
+
+                        n_bytes += d->memfd.size;
+
                 } else if (d->type == KDBUS_MSG_FDS) {
                         int *f;
                         unsigned j;
@@ -461,16 +493,14 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
                         memcpy(fds + n_fds, d->fds, sizeof(int) * j);
                         n_fds += j;
 
-                } else if (d->type == KDBUS_MSG_DST_NAME)
-                        destination = d->str;
-                else if (d->type == KDBUS_MSG_SRC_SECLABEL)
+                } else if (d->type == KDBUS_MSG_SRC_SECLABEL)
                         seclabel = d->str;
         }
 
         if (!h)
                 return -EBADMSG;
 
-        r = bus_header_size(h, &total);
+        r = bus_header_message_size(h, &total);
         if (r < 0)
                 return r;
 
@@ -489,11 +519,7 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
                 if (d->type == KDBUS_MSG_PAYLOAD_VEC) {
                         size_t begin_body;
 
-                        /* Fill in fields material */
-                        range_contains(idx, d->vec.size, ALIGN8(sizeof(struct bus_header)), BUS_MESSAGE_FIELDS_SIZE(m),
-                                       UINT64_TO_PTR(d->vec.address), &m->fields);
-
-                        begin_body = ALIGN8(sizeof(struct bus_header)) + ALIGN8(BUS_MESSAGE_FIELDS_SIZE(m));
+                        begin_body = BUS_MESSAGE_BODY_BEGIN(m);
 
                         if (idx + d->vec.size > begin_body) {
                                 struct bus_body_part *part;
@@ -502,22 +528,42 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
 
                                 part = message_append_part(m);
                                 if (!part) {
-                                        sd_bus_message_unref(m);
-                                        return -ENOMEM;
+                                        r = -ENOMEM;
+                                        goto fail;
                                 }
 
                                 if (idx >= begin_body) {
-                                        part->data = (void*) d->vec.address;
+                                        part->data = UINT64_TO_PTR(d->vec.address);
                                         part->size = d->vec.size;
                                 } else {
-                                        part->data = (uint8_t*) (uintptr_t) d->vec.address + (begin_body - idx);
+                                        part->data = d->vec.address != 0 ? (uint8_t*) UINT64_TO_PTR(d->vec.address) + (begin_body - idx) : NULL;
                                         part->size = d->vec.size - (begin_body - idx);
                                 }
 
+                                part->is_zero = d->vec.address == 0;
                                 part->sealed = true;
                         }
 
                         idx += d->vec.size;
+                } else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD) {
+                        struct bus_body_part *part;
+
+                        if (idx < BUS_MESSAGE_BODY_BEGIN(m)) {
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        part = message_append_part(m);
+                        if (!part) {
+                                r = -ENOMEM;
+                                goto fail;
+                        }
+
+                        part->memfd = d->memfd.fd;
+                        part->size = d->memfd.size;
+                        part->sealed = true;
+
+                        idx += d->memfd.size;
 
                 } else if (d->type == KDBUS_MSG_SRC_CREDS) {
                         m->pid_starttime = d->creds.starttime / NSEC_PER_USEC;
@@ -545,22 +591,16 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
                 else if (d->type == KDBUS_MSG_SRC_CAPS) {
                         m->capability = d->data;
                         m->capability_size = l;
-                } else if (d->type != KDBUS_MSG_FDS &&
-                           d->type != KDBUS_MSG_DST_NAME &&
+                } else if (d->type == KDBUS_MSG_DST_NAME)
+                        destination = d->str;
+                else if (d->type != KDBUS_MSG_FDS &&
                            d->type != KDBUS_MSG_SRC_SECLABEL)
                         log_debug("Got unknown field from kernel %llu", d->type);
         }
 
-        if ((BUS_MESSAGE_FIELDS_SIZE(m) > 0 && !m->fields)) {
-                sd_bus_message_unref(m);
-                return -EBADMSG;
-        }
-
         r = bus_message_parse_fields(m);
-        if (r < 0) {
-                sd_bus_message_unref(m);
-                return r;
-        }
+        if (r < 0)
+                goto fail;
 
         if (k->src_id == KDBUS_SRC_ID_KERNEL)
                 m->sender = "org.freedesktop.DBus";
@@ -589,6 +629,21 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess
 
         *ret = m;
         return 1;
+
+fail:
+        if (m) {
+                struct bus_body_part *part;
+                unsigned i;
+
+                /* Make sure the memfds are not freed twice */
+                MESSAGE_FOREACH_PART(part, i, m)
+                        if (part->memfd >= 0)
+                                part->memfd = -1;
+
+                sd_bus_message_unref(m);
+        }
+
+        return r;
 }
 
 int bus_kernel_read_message(sd_bus *bus, sd_bus_message **m) {
@@ -606,15 +661,6 @@ int bus_kernel_read_message(sd_bus *bus, sd_bus_message **m) {
                 return -errno;
         }
 
-
-/*                 /\* Let's tell valgrind that there's really no need to */
-/*                  * initialize this fully. This should be removed again */
-/*                  * when valgrind learned the kdbus ioctls natively. *\/ */
-/* #ifdef HAVE_VALGRIND_MEMCHECK_H */
-/*                 VALGRIND_MAKE_MEM_DEFINED(k, sz); */
-/* #endif */
-
-
         r = bus_kernel_make_message(bus, k, m);
         if (r <= 0)
                 close_kdbus_msg(bus, k);
@@ -675,6 +721,7 @@ int bus_kernel_create(const char *name, char **s) {
 
 int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) {
         struct memfd_cache *c;
+        int fd;
 
         assert(address);
         assert(size);
@@ -682,8 +729,12 @@ int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) {
         if (!bus || !bus->is_kernel)
                 return -ENOTSUP;
 
+        assert_se(pthread_mutex_lock(&bus->memfd_cache_mutex) == 0);
+
         if (bus->n_memfd_cache <= 0) {
-                int fd, r;
+                int r;
+
+                assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) == 0);
 
                 r = ioctl(bus->input_fd, KDBUS_CMD_MEMFD_NEW, &fd);
                 if (r < 0)
@@ -701,8 +752,18 @@ int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) {
 
         *address = c->address;
         *size = c->size;
+        fd = c->fd;
+
+        assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) == 0);
+
+        return fd;
+}
+
+static void close_and_munmap(int fd, void *address, size_t size) {
+        if (size > 0)
+                assert_se(munmap(address, PAGE_ALIGN(size)) == 0);
 
-        return c->fd;
+        close_nointr_nofail(fd);
 }
 
 void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) {
@@ -711,13 +772,17 @@ void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) {
         assert(fd >= 0);
         assert(size == 0 || address);
 
-        if (!bus || !bus->is_kernel ||
-            bus->n_memfd_cache >= ELEMENTSOF(bus->memfd_cache)) {
+        if (!bus || !bus->is_kernel) {
+                close_and_munmap(fd, address, size);
+                return;
+        }
 
-                if (size > 0)
-                        assert_se(munmap(address, PAGE_ALIGN(size)) == 0);
+        assert_se(pthread_mutex_lock(&bus->memfd_cache_mutex) == 0);
 
-                close_nointr_nofail(fd);
+        if (bus->n_memfd_cache >= ELEMENTSOF(bus->memfd_cache)) {
+                assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) == 0);
+
+                close_and_munmap(fd, address, size);
                 return;
         }
 
@@ -734,6 +799,8 @@ void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) {
                 c->size = MEMFD_CACHE_ITEM_SIZE_MAX;
         } else
                 c->size = size;
+
+        assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) == 0);
 }
 
 void bus_kernel_flush_memfd(sd_bus *b) {