X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Flibsystemd-bus%2Fbus-kernel.c;h=ffa843d5d1090b5fdf8e73029d5fc6d0113aa0bf;hb=602c0e740f8290cc9c4f13f2eb4b23fbbd7a8d2b;hp=f7759b6fb498ea9f8c9926504124b86137139b80;hpb=1307c3ff9aa9d96fff6f9f42bb760887fa9aa240;p=elogind.git diff --git a/src/libsystemd-bus/bus-kernel.c b/src/libsystemd-bus/bus-kernel.c index f7759b6fb..ffa843d5d 100644 --- a/src/libsystemd-bus/bus-kernel.c +++ b/src/libsystemd-bus/bus-kernel.c @@ -34,20 +34,7 @@ #include "bus-kernel.h" #include "bus-bloom.h" -#define KDBUS_ITEM_NEXT(item) \ - (typeof(item))(((uint8_t *)item) + ALIGN8((item)->size)) - -#define KDBUS_ITEM_FOREACH(item, head) \ - for (item = (head)->items; \ - (uint8_t *)(item) < (uint8_t *)(head) + (head)->size; \ - item = KDBUS_ITEM_NEXT(item)) - -#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data) -#define KDBUS_ITEM_SIZE(s) ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE) - -#define KDBUS_POOL_SIZE (4*1024*1024) - -static int parse_unique_name(const char *s, uint64_t *id) { +int bus_kernel_parse_unique_name(const char *s, uint64_t *id) { int r; assert(s); @@ -69,6 +56,10 @@ static void append_payload_vec(struct kdbus_item **d, const void *p, size_t sz) *d = ALIGN8_PTR(*d); + /* Note that p can be NULL, which encodes a region full of + * zeroes, which is useful to optimize certain padding + * conditions */ + (*d)->size = offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec); (*d)->type = KDBUS_MSG_PAYLOAD_VEC; (*d)->vec.address = PTR_TO_UINT64(p); @@ -150,6 +141,7 @@ static int bus_message_setup_bloom(sd_bus_message *m, void *bloom) { bloom_add_pair(bloom, "member", m->member); if (m->path) { bloom_add_pair(bloom, "path", m->path); + bloom_add_pair(bloom, "path-slash-prefix", m->path); bloom_add_prefixes(bloom, "path-slash-prefix", m->path, '/'); } @@ -213,7 +205,7 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { return 0; if (m->destination) { - r = parse_unique_name(m->destination, &unique); + r = bus_kernel_parse_unique_name(m->destination, &unique); if (r < 0) return r; @@ -244,9 +236,12 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { sz += ALIGN8(offsetof(struct kdbus_item, fds) + sizeof(int)*m->n_fds); m->kdbus = memalign(8, sz); - if (!m->kdbus) - return -ENOMEM; + if (!m->kdbus) { + r = -ENOMEM; + goto fail; + } + m->free_kdbus = true; memset(m->kdbus, 0, sz); m->kdbus->flags = @@ -269,24 +264,28 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { MESSAGE_FOREACH_PART(part, i, m) { if (part->is_zero) { + /* If this is padding then simply send a + * vector with a NULL data pointer which the + * kernel will just pass through. This is the + * most efficient way to encode zeroes */ + append_payload_vec(&d, NULL, part->size); continue; } - if (part->memfd >= 0 && part->sealed) { - bus_body_part_unmap(part); + if (part->memfd >= 0 && part->sealed && m->destination) { + /* Try to send a memfd, if the part is + * sealed and this is not a broadcast. Since we can only */ - if (!part->data) { - append_payload_memfd(&d, part->memfd, part->size); - continue; - } + append_payload_memfd(&d, part->memfd, part->size); + continue; } - if (part->memfd >= 0) { - r = bus_body_part_map(part); - if (r < 0) - goto fail; - } + /* Otherwise let's send a vector to the actual data, + * for that we need to map it first. */ + r = bus_body_part_map(part); + if (r < 0) + goto fail; append_payload_vec(&d, part->data, part->size); } @@ -306,23 +305,15 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { m->kdbus->size = (uint8_t*) d - (uint8_t*) m->kdbus; assert(m->kdbus->size <= sz); - m->free_kdbus = true; - return 0; fail: - free(m->kdbus); - m->kdbus = NULL; + m->poisoned = true; return r; } int bus_kernel_take_fd(sd_bus *b) { - uint8_t h[ALIGN8(sizeof(struct kdbus_cmd_hello)) + - ALIGN8(KDBUS_ITEM_HEADER_SIZE) + - ALIGN8(sizeof(struct kdbus_vec))] = {}; - - struct kdbus_cmd_hello *hello = (struct kdbus_cmd_hello*) h; - + struct kdbus_cmd_hello hello; int r; assert(b); @@ -330,49 +321,40 @@ int bus_kernel_take_fd(sd_bus *b) { if (b->is_server) return -EINVAL; + b->use_memfd = 1; + + zero(hello); + hello.size = sizeof(hello); + hello.conn_flags = b->hello_flags; + hello.pool_size = KDBUS_POOL_SIZE; + + r = ioctl(b->input_fd, KDBUS_CMD_HELLO, &hello); + if (r < 0) + return -errno; + if (!b->kdbus_buffer) { - b->kdbus_buffer = mmap(NULL, KDBUS_POOL_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + b->kdbus_buffer = mmap(NULL, KDBUS_POOL_SIZE, PROT_READ, MAP_SHARED, b->input_fd, 0); if (b->kdbus_buffer == MAP_FAILED) { b->kdbus_buffer = NULL; return -errno; } } - hello->size = sizeof(h); - hello->conn_flags = - KDBUS_HELLO_ACCEPT_FD| - KDBUS_HELLO_ATTACH_COMM| - KDBUS_HELLO_ATTACH_EXE| - KDBUS_HELLO_ATTACH_CMDLINE| - KDBUS_HELLO_ATTACH_CGROUP| - KDBUS_HELLO_ATTACH_CAPS| - KDBUS_HELLO_ATTACH_SECLABEL| - KDBUS_HELLO_ATTACH_AUDIT; - - hello->items[0].type = KDBUS_HELLO_POOL; - hello->items[0].size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec); - hello->items[0].vec.address = (uint64_t) b->kdbus_buffer; - hello->items[0].vec.size = KDBUS_POOL_SIZE; - - r = ioctl(b->input_fd, KDBUS_CMD_HELLO, hello); - if (r < 0) - return -errno; - /* The higher 32bit of both flags fields are considered * 'incompatible flags'. Refuse them all for now. */ - if (hello->bus_flags > 0xFFFFFFFFULL || - hello->conn_flags > 0xFFFFFFFFULL) + if (hello.bus_flags > 0xFFFFFFFFULL || + hello.conn_flags > 0xFFFFFFFFULL) return -ENOTSUP; - if (hello->bloom_size != BLOOM_SIZE) + if (hello.bloom_size != BLOOM_SIZE) return -ENOTSUP; - if (asprintf(&b->unique_name, ":1.%llu", (unsigned long long) hello->id) < 0) + if (asprintf(&b->unique_name, ":1.%llu", (unsigned long long) hello.id) < 0) return -ENOMEM; b->is_kernel = true; b->bus_client = true; - b->can_fds = true; + b->can_fds = !!(hello.conn_flags & KDBUS_HELLO_ACCEPT_FD); r = bus_start_running(b); if (r < 0) @@ -418,12 +400,14 @@ int bus_kernel_write_message(sd_bus *bus, sd_bus_message *m) { } static void close_kdbus_msg(sd_bus *bus, struct kdbus_msg *k) { + uint64_t off; struct kdbus_item *d; assert(bus); assert(k); - ioctl(bus->input_fd, KDBUS_CMD_MSG_RELEASE, k); + off = (uint8_t *)k - (uint8_t *)bus->kdbus_buffer; + ioctl(bus->input_fd, KDBUS_CMD_MSG_RELEASE, &off); KDBUS_ITEM_FOREACH(d, k) { @@ -456,10 +440,10 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess l = d->size - offsetof(struct kdbus_item, data); - if (d->type == KDBUS_MSG_PAYLOAD_VEC) { + if (d->type == KDBUS_MSG_PAYLOAD_OFF) { if (!h) { - h = UINT64_TO_PTR(d->vec.address); + h = (struct bus_header *)((uint8_t *)bus->kdbus_buffer + d->vec.offset); if (!bus_header_is_complete(h, d->vec.size)) return -EBADMSG; @@ -510,7 +494,7 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess l = d->size - offsetof(struct kdbus_item, data); - if (d->type == KDBUS_MSG_PAYLOAD_VEC) { + if (d->type == KDBUS_MSG_PAYLOAD_OFF) { size_t begin_body; begin_body = BUS_MESSAGE_BODY_BEGIN(m); @@ -526,15 +510,19 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess goto fail; } + /* A -1 offset is NUL padding. */ + part->is_zero = d->vec.offset == ~0ULL; + if (idx >= begin_body) { - part->data = UINT64_TO_PTR(d->vec.address); + if (!part->is_zero) + part->data = (uint8_t *)bus->kdbus_buffer + d->vec.offset; part->size = d->vec.size; } else { - part->data = d->vec.address != 0 ? (uint8_t*) UINT64_TO_PTR(d->vec.address) + (begin_body - idx) : NULL; + if (!part->is_zero) + part->data = (uint8_t *)bus->kdbus_buffer + d->vec.offset + (begin_body - idx); part->size = d->vec.size - (begin_body - idx); } - part->is_zero = d->vec.address == 0; part->sealed = true; } @@ -641,19 +629,21 @@ fail: } int bus_kernel_read_message(sd_bus *bus, sd_bus_message **m) { + uint64_t off; struct kdbus_msg *k; int r; assert(bus); assert(m); - r = ioctl(bus->input_fd, KDBUS_CMD_MSG_RECV, &k); + r = ioctl(bus->input_fd, KDBUS_CMD_MSG_RECV, &off); if (r < 0) { if (errno == EAGAIN) return 0; return -errno; } + k = (struct kdbus_msg *)((uint8_t *)bus->kdbus_buffer + off); r = bus_kernel_make_message(bus, k, m); if (r <= 0) @@ -715,6 +705,7 @@ int bus_kernel_create(const char *name, char **s) { int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) { struct memfd_cache *c; + int fd; assert(address); assert(size); @@ -722,8 +713,12 @@ int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) { if (!bus || !bus->is_kernel) return -ENOTSUP; + assert_se(pthread_mutex_lock(&bus->memfd_cache_mutex) >= 0); + if (bus->n_memfd_cache <= 0) { - int fd, r; + int r; + + assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) >= 0); r = ioctl(bus->input_fd, KDBUS_CMD_MEMFD_NEW, &fd); if (r < 0) @@ -734,30 +729,45 @@ int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) { return fd; } - c = &bus->memfd_cache[-- bus->n_memfd_cache]; + c = &bus->memfd_cache[--bus->n_memfd_cache]; assert(c->fd >= 0); assert(c->size == 0 || c->address); *address = c->address; *size = c->size; + fd = c->fd; - return c->fd; + assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) >= 0); + + return fd; +} + +static void close_and_munmap(int fd, void *address, size_t size) { + if (size > 0) + assert_se(munmap(address, PAGE_ALIGN(size)) >= 0); + + close_nointr_nofail(fd); } void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) { struct memfd_cache *c; + uint64_t max_sz = PAGE_ALIGN(MEMFD_CACHE_ITEM_SIZE_MAX); assert(fd >= 0); assert(size == 0 || address); - if (!bus || !bus->is_kernel || - bus->n_memfd_cache >= ELEMENTSOF(bus->memfd_cache)) { + if (!bus || !bus->is_kernel) { + close_and_munmap(fd, address, size); + return; + } + + assert_se(pthread_mutex_lock(&bus->memfd_cache_mutex) >= 0); - if (size > 0) - assert_se(munmap(address, PAGE_ALIGN(size)) == 0); + if (bus->n_memfd_cache >= ELEMENTSOF(bus->memfd_cache)) { + assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) >= 0); - close_nointr_nofail(fd); + close_and_munmap(fd, address, size); return; } @@ -766,14 +776,14 @@ void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) { c->address = address; /* If overly long, let's return a bit to the OS */ - if (size > MEMFD_CACHE_ITEM_SIZE_MAX) { - uint64_t sz = MEMFD_CACHE_ITEM_SIZE_MAX; - - ioctl(bus->input_fd, KDBUS_CMD_MEMFD_SIZE_SET, &sz); - - c->size = MEMFD_CACHE_ITEM_SIZE_MAX; + if (size > max_sz) { + assert_se(ioctl(fd, KDBUS_CMD_MEMFD_SIZE_SET, &max_sz) >= 0); + assert_se(munmap((uint8_t*) address + max_sz, PAGE_ALIGN(size - max_sz)) >= 0); + c->size = max_sz; } else c->size = size; + + assert_se(pthread_mutex_unlock(&bus->memfd_cache_mutex) >= 0); } void bus_kernel_flush_memfd(sd_bus *b) { @@ -781,10 +791,6 @@ void bus_kernel_flush_memfd(sd_bus *b) { assert(b); - for (i = 0; i < b->n_memfd_cache; i++) { - if (b->memfd_cache[i].size > 0) - assert_se(munmap(b->memfd_cache[i].address, PAGE_ALIGN(b->memfd_cache[i].size)) == 0); - - close_nointr_nofail(b->memfd_cache[i].fd); - } + for (i = 0; i < b->n_memfd_cache; i++) + close_and_munmap(b->memfd_cache[i].fd, b->memfd_cache[i].address, b->memfd_cache[i].size); }