X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Flibsystemd-bus%2Fbus-kernel.c;h=f7759b6fb498ea9f8c9926504124b86137139b80;hp=2bb1b9a19a70993ebbcedf7891cd89f7e0d02f17;hb=1307c3ff9aa9d96fff6f9f42bb760887fa9aa240;hpb=c556fe792d4075a365b276e51666a8009d7dca19 diff --git a/src/libsystemd-bus/bus-kernel.c b/src/libsystemd-bus/bus-kernel.c index 2bb1b9a19..f7759b6fb 100644 --- a/src/libsystemd-bus/bus-kernel.c +++ b/src/libsystemd-bus/bus-kernel.c @@ -25,6 +25,7 @@ #include #include +#include #include "util.h" @@ -35,11 +36,17 @@ #define KDBUS_ITEM_NEXT(item) \ (typeof(item))(((uint8_t *)item) + ALIGN8((item)->size)) + #define KDBUS_ITEM_FOREACH(item, head) \ for (item = (head)->items; \ (uint8_t *)(item) < (uint8_t *)(head) + (head)->size; \ item = KDBUS_ITEM_NEXT(item)) +#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data) +#define KDBUS_ITEM_SIZE(s) ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE) + +#define KDBUS_POOL_SIZE (4*1024*1024) + static int parse_unique_name(const char *s, uint64_t *id) { int r; @@ -56,50 +63,76 @@ static int parse_unique_name(const char *s, uint64_t *id) { return 1; } -static void append_payload_vec(struct kdbus_msg_item **d, const void *p, size_t sz) { +static void append_payload_vec(struct kdbus_item **d, const void *p, size_t sz) { assert(d); - assert(p); assert(sz > 0); *d = ALIGN8_PTR(*d); - (*d)->size = offsetof(struct kdbus_msg_item, vec) + sizeof(struct kdbus_vec); + (*d)->size = offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec); (*d)->type = KDBUS_MSG_PAYLOAD_VEC; - (*d)->vec.address = (uint64_t) p; + (*d)->vec.address = PTR_TO_UINT64(p); (*d)->vec.size = sz; - *d = (struct kdbus_msg_item *) ((uint8_t*) *d + (*d)->size); + *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size); } -static void append_destination(struct kdbus_msg_item **d, const char *s, size_t length) { +static void append_payload_memfd(struct kdbus_item **d, int memfd, size_t sz) { + assert(d); + assert(memfd >= 0); + assert(sz > 0); + + *d = ALIGN8_PTR(*d); + (*d)->size = offsetof(struct kdbus_item, memfd) + sizeof(struct kdbus_memfd); + (*d)->type = KDBUS_MSG_PAYLOAD_MEMFD; + (*d)->memfd.fd = memfd; + (*d)->memfd.size = sz; + + *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size); +} + +static void append_destination(struct kdbus_item **d, const char *s, size_t length) { assert(d); assert(s); *d = ALIGN8_PTR(*d); - (*d)->size = offsetof(struct kdbus_msg_item, str) + length + 1; + (*d)->size = offsetof(struct kdbus_item, str) + length + 1; (*d)->type = KDBUS_MSG_DST_NAME; memcpy((*d)->str, s, length + 1); - *d = (struct kdbus_msg_item *) ((uint8_t*) *d + (*d)->size); + *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size); } -static void* append_bloom(struct kdbus_msg_item **d, size_t length) { +static void* append_bloom(struct kdbus_item **d, size_t length) { void *r; assert(d); *d = ALIGN8_PTR(*d); - (*d)->size = offsetof(struct kdbus_msg_item, data) + length; + (*d)->size = offsetof(struct kdbus_item, data) + length; (*d)->type = KDBUS_MSG_BLOOM; r = (*d)->data; - *d = (struct kdbus_msg_item *) ((uint8_t*) *d + (*d)->size); + *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size); return r; } +static void append_fds(struct kdbus_item **d, const int fds[], unsigned n_fds) { + assert(d); + assert(fds); + assert(n_fds > 0); + + *d = ALIGN8_PTR(*d); + (*d)->size = offsetof(struct kdbus_item, fds) + sizeof(int) * n_fds; + (*d)->type = KDBUS_MSG_FDS; + memcpy((*d)->fds, fds, sizeof(int) * n_fds); + + *d = (struct kdbus_item *) ((uint8_t*) *d + (*d)->size); +} + static int bus_message_setup_bloom(sd_bus_message *m, void *bloom) { unsigned i; int r; @@ -164,10 +197,12 @@ static int bus_message_setup_bloom(sd_bus_message *m, void *bloom) { } static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { - struct kdbus_msg_item *d; + struct bus_body_part *part; + struct kdbus_item *d; bool well_known; uint64_t unique; size_t sz, dl; + unsigned i; int r; assert(b); @@ -188,18 +223,26 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { sz = offsetof(struct kdbus_msg, items); + assert_cc(ALIGN8(offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec)) == + ALIGN8(offsetof(struct kdbus_item, memfd) + sizeof(struct kdbus_memfd))); + /* Add in fixed header, fields header and payload */ - sz += 3 * ALIGN8(offsetof(struct kdbus_msg_item, vec) + sizeof(struct kdbus_vec)); + sz += (1 + m->n_body_parts) * + ALIGN8(offsetof(struct kdbus_item, vec) + sizeof(struct kdbus_vec)); /* Add space for bloom filter */ - sz += ALIGN8(offsetof(struct kdbus_msg_item, data) + BLOOM_SIZE); + sz += ALIGN8(offsetof(struct kdbus_item, data) + BLOOM_SIZE); /* Add in well-known destination header */ if (well_known) { dl = strlen(m->destination); - sz += ALIGN8(offsetof(struct kdbus_msg_item, str) + dl + 1); + sz += ALIGN8(offsetof(struct kdbus_item, str) + dl + 1); } + /* Add space for unix fds */ + if (m->n_fds > 0) + sz += ALIGN8(offsetof(struct kdbus_item, fds) + sizeof(int)*m->n_fds); + m->kdbus = memalign(8, sz); if (!m->kdbus) return -ENOMEM; @@ -222,48 +265,64 @@ static int bus_message_setup_kmsg(sd_bus *b, sd_bus_message *m) { if (well_known) append_destination(&d, m->destination, dl); - append_payload_vec(&d, m->header, sizeof(*m->header)); + append_payload_vec(&d, m->header, BUS_MESSAGE_BODY_BEGIN(m)); + + MESSAGE_FOREACH_PART(part, i, m) { + if (part->is_zero) { + append_payload_vec(&d, NULL, part->size); + continue; + } + + if (part->memfd >= 0 && part->sealed) { + bus_body_part_unmap(part); + + if (!part->data) { + append_payload_memfd(&d, part->memfd, part->size); + continue; + } + } - if (m->fields) - append_payload_vec(&d, m->fields, ALIGN8(m->header->fields_size)); + if (part->memfd >= 0) { + r = bus_body_part_map(part); + if (r < 0) + goto fail; + } - if (m->body) - append_payload_vec(&d, m->body, m->header->body_size); + append_payload_vec(&d, part->data, part->size); + } if (m->kdbus->dst_id == KDBUS_DST_ID_BROADCAST) { void *p; - /* For now, let's add a mask all bloom filter */ p = append_bloom(&d, BLOOM_SIZE); r = bus_message_setup_bloom(m, p); - if (r < 0) { - free(m->kdbus); - m->kdbus = NULL; - return -r; - } + if (r < 0) + goto fail; } + if (m->n_fds > 0) + append_fds(&d, m->fds, m->n_fds); + m->kdbus->size = (uint8_t*) d - (uint8_t*) m->kdbus; assert(m->kdbus->size <= sz); m->free_kdbus = true; return 0; + +fail: + free(m->kdbus); + m->kdbus = NULL; + return r; } int bus_kernel_take_fd(sd_bus *b) { - struct kdbus_cmd_hello hello = { - .conn_flags = - KDBUS_CMD_HELLO_ACCEPT_FD| - KDBUS_CMD_HELLO_ACCEPT_MMAP| - KDBUS_CMD_HELLO_ATTACH_COMM| - KDBUS_CMD_HELLO_ATTACH_EXE| - KDBUS_CMD_HELLO_ATTACH_CMDLINE| - KDBUS_CMD_HELLO_ATTACH_CGROUP| - KDBUS_CMD_HELLO_ATTACH_CAPS| - KDBUS_CMD_HELLO_ATTACH_SECLABEL| - KDBUS_CMD_HELLO_ATTACH_AUDIT - }; + uint8_t h[ALIGN8(sizeof(struct kdbus_cmd_hello)) + + ALIGN8(KDBUS_ITEM_HEADER_SIZE) + + ALIGN8(sizeof(struct kdbus_vec))] = {}; + + struct kdbus_cmd_hello *hello = (struct kdbus_cmd_hello*) h; + int r; assert(b); @@ -271,24 +330,49 @@ int bus_kernel_take_fd(sd_bus *b) { if (b->is_server) return -EINVAL; - r = ioctl(b->input_fd, KDBUS_CMD_HELLO, &hello); + if (!b->kdbus_buffer) { + b->kdbus_buffer = mmap(NULL, KDBUS_POOL_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (b->kdbus_buffer == MAP_FAILED) { + b->kdbus_buffer = NULL; + return -errno; + } + } + + hello->size = sizeof(h); + hello->conn_flags = + KDBUS_HELLO_ACCEPT_FD| + KDBUS_HELLO_ATTACH_COMM| + KDBUS_HELLO_ATTACH_EXE| + KDBUS_HELLO_ATTACH_CMDLINE| + KDBUS_HELLO_ATTACH_CGROUP| + KDBUS_HELLO_ATTACH_CAPS| + KDBUS_HELLO_ATTACH_SECLABEL| + KDBUS_HELLO_ATTACH_AUDIT; + + hello->items[0].type = KDBUS_HELLO_POOL; + hello->items[0].size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec); + hello->items[0].vec.address = (uint64_t) b->kdbus_buffer; + hello->items[0].vec.size = KDBUS_POOL_SIZE; + + r = ioctl(b->input_fd, KDBUS_CMD_HELLO, hello); if (r < 0) return -errno; /* The higher 32bit of both flags fields are considered * 'incompatible flags'. Refuse them all for now. */ - if (hello.bus_flags > 0xFFFFFFFFULL || - hello.conn_flags > 0xFFFFFFFFULL) + if (hello->bus_flags > 0xFFFFFFFFULL || + hello->conn_flags > 0xFFFFFFFFULL) return -ENOTSUP; - if (hello.bloom_size != BLOOM_SIZE) + if (hello->bloom_size != BLOOM_SIZE) return -ENOTSUP; - if (asprintf(&b->unique_name, ":1.%llu", (unsigned long long) hello.id) < 0) + if (asprintf(&b->unique_name, ":1.%llu", (unsigned long long) hello->id) < 0) return -ENOMEM; b->is_kernel = true; b->bus_client = true; + b->can_fds = true; r = bus_start_running(b); if (r < 0) @@ -333,22 +417,27 @@ int bus_kernel_write_message(sd_bus *bus, sd_bus_message *m) { return 1; } -static void close_kdbus_msg(struct kdbus_msg *k) { - struct kdbus_msg_item *d; +static void close_kdbus_msg(sd_bus *bus, struct kdbus_msg *k) { + struct kdbus_item *d; - KDBUS_ITEM_FOREACH(d, k) { + assert(bus); + assert(k); - if (d->type != KDBUS_MSG_UNIX_FDS) - continue; + ioctl(bus->input_fd, KDBUS_CMD_MSG_RELEASE, k); + + KDBUS_ITEM_FOREACH(d, k) { - close_many(d->fds, (d->size - offsetof(struct kdbus_msg_item, fds)) / sizeof(int)); + if (d->type == KDBUS_MSG_FDS) + close_many(d->fds, (d->size - offsetof(struct kdbus_item, fds)) / sizeof(int)); + else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD) + close_nointr_nofail(d->memfd.fd); } } static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_message **ret) { sd_bus_message *m = NULL; - struct kdbus_msg_item *d; - unsigned n_payload = 0, n_fds = 0; + struct kdbus_item *d; + unsigned n_fds = 0; _cleanup_free_ int *fds = NULL; struct bus_header *h = NULL; size_t total, n_bytes = 0, idx = 0; @@ -365,21 +454,27 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess KDBUS_ITEM_FOREACH(d, k) { size_t l; - l = d->size - offsetof(struct kdbus_msg_item, data); + l = d->size - offsetof(struct kdbus_item, data); - if (d->type == KDBUS_MSG_PAYLOAD) { + if (d->type == KDBUS_MSG_PAYLOAD_VEC) { if (!h) { - if (l < sizeof(struct bus_header)) - return -EBADMSG; + h = UINT64_TO_PTR(d->vec.address); - h = (struct bus_header*) d->data; + if (!bus_header_is_complete(h, d->vec.size)) + return -EBADMSG; } - n_payload++; - n_bytes += l; + n_bytes += d->vec.size; + + } else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD) { + + if (!h) + return -EBADMSG; + + n_bytes += d->memfd.size; - } else if (d->type == KDBUS_MSG_UNIX_FDS) { + } else if (d->type == KDBUS_MSG_FDS) { int *f; unsigned j; @@ -389,19 +484,17 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess return -ENOMEM; fds = f; - memcpy(fds + n_fds, d->fds, j); + memcpy(fds + n_fds, d->fds, sizeof(int) * j); n_fds += j; - } else if (d->type == KDBUS_MSG_DST_NAME) - destination = d->str; - else if (d->type == KDBUS_MSG_SRC_SECLABEL) + } else if (d->type == KDBUS_MSG_SRC_SECLABEL) seclabel = d->str; } if (!h) return -EBADMSG; - r = bus_header_size(h, &total); + r = bus_header_message_size(h, &total); if (r < 0) return r; @@ -415,22 +508,57 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess KDBUS_ITEM_FOREACH(d, k) { size_t l; - l = d->size - offsetof(struct kdbus_msg_item, data); + l = d->size - offsetof(struct kdbus_item, data); - if (d->type == KDBUS_MSG_PAYLOAD) { + if (d->type == KDBUS_MSG_PAYLOAD_VEC) { + size_t begin_body; - if (idx == sizeof(struct bus_header) && - l == ALIGN8(BUS_MESSAGE_FIELDS_SIZE(m))) - m->fields = d->data; - else if (idx == sizeof(struct bus_header) + ALIGN8(BUS_MESSAGE_FIELDS_SIZE(m)) && - l == BUS_MESSAGE_BODY_SIZE(m)) - m->body = d->data; - else if (!(idx == 0 && l == sizeof(struct bus_header))) { - sd_bus_message_unref(m); - return -EBADMSG; + begin_body = BUS_MESSAGE_BODY_BEGIN(m); + + if (idx + d->vec.size > begin_body) { + struct bus_body_part *part; + + /* Contains body material */ + + part = message_append_part(m); + if (!part) { + r = -ENOMEM; + goto fail; + } + + if (idx >= begin_body) { + part->data = UINT64_TO_PTR(d->vec.address); + part->size = d->vec.size; + } else { + part->data = d->vec.address != 0 ? (uint8_t*) UINT64_TO_PTR(d->vec.address) + (begin_body - idx) : NULL; + part->size = d->vec.size - (begin_body - idx); + } + + part->is_zero = d->vec.address == 0; + part->sealed = true; + } + + idx += d->vec.size; + } else if (d->type == KDBUS_MSG_PAYLOAD_MEMFD) { + struct bus_body_part *part; + + if (idx < BUS_MESSAGE_BODY_BEGIN(m)) { + r = -EBADMSG; + goto fail; + } + + part = message_append_part(m); + if (!part) { + r = -ENOMEM; + goto fail; } - idx += l; + part->memfd = d->memfd.fd; + part->size = d->memfd.size; + part->sealed = true; + + idx += d->memfd.size; + } else if (d->type == KDBUS_MSG_SRC_CREDS) { m->pid_starttime = d->creds.starttime / NSEC_PER_USEC; m->uid = d->creds.uid; @@ -452,15 +580,21 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess m->cmdline_length = l; } else if (d->type == KDBUS_MSG_SRC_CGROUP) m->cgroup = d->str; - else + else if (d->type == KDBUS_MSG_SRC_AUDIT) + m->audit = &d->audit; + else if (d->type == KDBUS_MSG_SRC_CAPS) { + m->capability = d->data; + m->capability_size = l; + } else if (d->type == KDBUS_MSG_DST_NAME) + destination = d->str; + else if (d->type != KDBUS_MSG_FDS && + d->type != KDBUS_MSG_SRC_SECLABEL) log_debug("Got unknown field from kernel %llu", d->type); } r = bus_message_parse_fields(m); - if (r < 0) { - sd_bus_message_unref(m); - return r; - } + if (r < 0) + goto fail; if (k->src_id == KDBUS_SRC_ID_KERNEL) m->sender = "org.freedesktop.DBus"; @@ -481,65 +615,56 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k, sd_bus_mess /* We take possession of the kmsg struct now */ m->kdbus = k; - m->free_kdbus = true; + m->bus = sd_bus_ref(bus); + m->release_kdbus = true; m->free_fds = true; fds = NULL; *ret = m; return 1; + +fail: + if (m) { + struct bus_body_part *part; + unsigned i; + + /* Make sure the memfds are not freed twice */ + MESSAGE_FOREACH_PART(part, i, m) + if (part->memfd >= 0) + part->memfd = -1; + + sd_bus_message_unref(m); + } + + return r; } int bus_kernel_read_message(sd_bus *bus, sd_bus_message **m) { struct kdbus_msg *k; - size_t sz = 1024; int r; assert(bus); assert(m); - for (;;) { - void *q; - - q = memalign(8, sz); - if (!q) - return -errno; - - free(bus->rbuffer); - k = bus->rbuffer = q; - k->size = sz; - - /* Let's tell valgrind that there's really no need to - * initialize this fully. This should be removed again - * when valgrind learned the kdbus ioctls natively. */ -#ifdef HAVE_VALGRIND_MEMCHECK_H - VALGRIND_MAKE_MEM_DEFINED(k, sz); -#endif - - r = ioctl(bus->input_fd, KDBUS_CMD_MSG_RECV, bus->rbuffer); - if (r >= 0) - break; - + r = ioctl(bus->input_fd, KDBUS_CMD_MSG_RECV, &k); + if (r < 0) { if (errno == EAGAIN) return 0; - if (errno != ENOBUFS) - return -errno; - - sz *= 2; + return -errno; } r = bus_kernel_make_message(bus, k, m); - if (r > 0) - bus->rbuffer = NULL; - else - close_kdbus_msg(k); + if (r <= 0) + close_kdbus_msg(bus, k); return r < 0 ? r : 1; } int bus_kernel_create(const char *name, char **s) { struct kdbus_cmd_bus_make *make; + struct kdbus_item *n, *cg; size_t l; int fd; char *p; @@ -552,16 +677,27 @@ int bus_kernel_create(const char *name, char **s) { return -errno; l = strlen(name); - make = alloca0(offsetof(struct kdbus_cmd_bus_make, name) + DECIMAL_STR_MAX(uid_t) + 1 + l + 1); - sprintf(make->name, "%lu-%s", (unsigned long) getuid(), name); - make->size = offsetof(struct kdbus_cmd_bus_make, name) + strlen(make->name) + 1; - make->flags = KDBUS_ACCESS_WORLD | KDBUS_POLICY_OPEN; + make = alloca0(offsetof(struct kdbus_cmd_bus_make, items) + + KDBUS_ITEM_HEADER_SIZE + sizeof(uint64_t) + + KDBUS_ITEM_HEADER_SIZE + DECIMAL_STR_MAX(uid_t) + 1 + l + 1); + + cg = make->items; + cg->type = KDBUS_MAKE_CGROUP; + cg->data64[0] = 1; + cg->size = KDBUS_ITEM_HEADER_SIZE + sizeof(uint64_t); + + n = KDBUS_ITEM_NEXT(cg); + n->type = KDBUS_MAKE_NAME; + sprintf(n->str, "%lu-%s", (unsigned long) getuid(), name); + n->size = KDBUS_ITEM_HEADER_SIZE + strlen(n->str) + 1; + + make->size = offsetof(struct kdbus_cmd_bus_make, items) + cg->size + n->size; + make->flags = KDBUS_MAKE_POLICY_OPEN; make->bus_flags = 0; make->bloom_size = BLOOM_SIZE; - make->cgroup_id = 1; assert_cc(BLOOM_SIZE % 8 == 0); - p = strjoin("/dev/kdbus/", make->name, "/bus", NULL); + p = strjoin("/dev/kdbus/", n->str, "/bus", NULL); if (!p) return -ENOMEM; @@ -576,3 +712,79 @@ int bus_kernel_create(const char *name, char **s) { return fd; } + +int bus_kernel_pop_memfd(sd_bus *bus, void **address, size_t *size) { + struct memfd_cache *c; + + assert(address); + assert(size); + + if (!bus || !bus->is_kernel) + return -ENOTSUP; + + if (bus->n_memfd_cache <= 0) { + int fd, r; + + r = ioctl(bus->input_fd, KDBUS_CMD_MEMFD_NEW, &fd); + if (r < 0) + return -errno; + + *address = NULL; + *size = 0; + return fd; + } + + c = &bus->memfd_cache[-- bus->n_memfd_cache]; + + assert(c->fd >= 0); + assert(c->size == 0 || c->address); + + *address = c->address; + *size = c->size; + + return c->fd; +} + +void bus_kernel_push_memfd(sd_bus *bus, int fd, void *address, size_t size) { + struct memfd_cache *c; + + assert(fd >= 0); + assert(size == 0 || address); + + if (!bus || !bus->is_kernel || + bus->n_memfd_cache >= ELEMENTSOF(bus->memfd_cache)) { + + if (size > 0) + assert_se(munmap(address, PAGE_ALIGN(size)) == 0); + + close_nointr_nofail(fd); + return; + } + + c = &bus->memfd_cache[bus->n_memfd_cache++]; + c->fd = fd; + c->address = address; + + /* If overly long, let's return a bit to the OS */ + if (size > MEMFD_CACHE_ITEM_SIZE_MAX) { + uint64_t sz = MEMFD_CACHE_ITEM_SIZE_MAX; + + ioctl(bus->input_fd, KDBUS_CMD_MEMFD_SIZE_SET, &sz); + + c->size = MEMFD_CACHE_ITEM_SIZE_MAX; + } else + c->size = size; +} + +void bus_kernel_flush_memfd(sd_bus *b) { + unsigned i; + + assert(b); + + for (i = 0; i < b->n_memfd_cache; i++) { + if (b->memfd_cache[i].size > 0) + assert_se(munmap(b->memfd_cache[i].address, PAGE_ALIGN(b->memfd_cache[i].size)) == 0); + + close_nointr_nofail(b->memfd_cache[i].fd); + } +}