X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fjournal%2Fmmap-cache.c;h=4c940aaa24afb9a3a7f6371f8c5ce5fb146eeea2;hp=77710ff9bf11d90cbc68bf2c982d2023d1051f59;hb=69adae5168da231c6cf319f708860954701b25ed;hpb=beec00856158b703f2125a3d936080346a8a8de1 diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 77710ff9b..4c940aaa2 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -19,310 +19,440 @@ along with systemd; If not, see . ***/ -#include -#include #include #include +#include #include +#include "hashmap.h" +#include "list.h" +#include "log.h" #include "util.h" - +#include "macro.h" #include "mmap-cache.h" -#define WINDOW_SIZE (8ULL*1024ULL*1024ULL) -#define WINDOWS_MAX 32 +typedef struct Window Window; +typedef struct Context Context; +typedef struct FileDescriptor FileDescriptor; -typedef struct Window { - int fd; +struct Window { + MMapCache *cache; + + bool keep_always; + bool in_unused; + + int prot; void *ptr; uint64_t offset; - uint64_t size; + size_t size; + + FileDescriptor *fd; - unsigned n_ref; - unsigned lru_prev; - unsigned lru_next; + LIST_FIELDS(Window, by_fd); + LIST_FIELDS(Window, unused); - unsigned by_fd_prev; - unsigned by_fd_next; -} Window; + LIST_HEAD(Context, contexts); +}; + +struct Context { + MMapCache *cache; + unsigned id; + Window *window; + + LIST_FIELDS(Context, by_window); +}; -typedef struct FileDescriptor { +struct FileDescriptor { + MMapCache *cache; int fd; - unsigned windows; -} FileDescriptor; + LIST_HEAD(Window, windows); +}; struct MMapCache { - unsigned n_ref; + int n_ref; + unsigned n_windows; - unsigned contexts_max; - unsigned windows_max; - unsigned fds_max; + unsigned n_hit, n_missed; - unsigned n_windows; - unsigned n_fds; - unsigned lru_first, lru_last; + Hashmap *fds; + Context *contexts[MMAP_CACHE_MAX_CONTEXTS]; - Window *windows; - unsigned *by_context; - FileDescriptor *by_fd; + LIST_HEAD(Window, unused); + Window *last_unused; }; -static void mmap_cache_window_unmap(MMapCache *m, unsigned w) { - Window *v; +#define WINDOWS_MIN 64 - assert(m); - assert(w < m->n_windows); +#ifdef ENABLE_DEBUG_MMAP_CACHE +/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */ +# define WINDOW_SIZE (page_size()) +#else +# define WINDOW_SIZE (8ULL*1024ULL*1024ULL) +#endif - v = m->windows + w; - if (!v->ptr) - return; +MMapCache* mmap_cache_new(void) { + MMapCache *m; - munmap(v->ptr, v->size); - v->ptr = NULL; -} + m = new0(MMapCache, 1); + if (!m) + return NULL; -static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) { - Window *v; + m->n_ref = 1; + return m; +} +MMapCache* mmap_cache_ref(MMapCache *m) { assert(m); - assert(w < m->n_windows); - - v = m->windows + w; - v->lru_prev = m->lru_last; - v->lru_next = (unsigned) -1; + assert(m->n_ref > 0); - m->lru_last = w; - if (m->lru_first == (unsigned) -1) - m->lru_first = w; + m->n_ref ++; + return m; } -static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) { - Window *v; +static void window_unlink(Window *w) { + Context *c; - assert(m); - assert(w < m->n_windows); + assert(w); - v = m->windows + w; + if (w->ptr) + munmap(w->ptr, w->size); - if (v->lru_prev == (unsigned) -1) - m->lru_first = v->lru_next; - else - m->windows[v->lru_prev].lru_next = v->lru_next; + if (w->fd) + LIST_REMOVE(by_fd, w->fd->windows, w); - if (v->lru_next == (unsigned) -1) - m->lru_last = v->lru_prev; - else - m->windows[v->lru_next].lru_prev = v->lru_prev; + if (w->in_unused) { + if (w->cache->last_unused == w) + w->cache->last_unused = w->unused_prev; + + LIST_REMOVE(unused, w->cache->unused, w); + } + + LIST_FOREACH(by_window, c, w->contexts) { + assert(c->window == w); + c->window = NULL; + } } -static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) { - Window *v; +static void window_free(Window *w) { + assert(w); - assert(m); - assert(fd_index < m->n_fds); + window_unlink(w); + w->cache->n_windows--; + free(w); +} - v = m->windows + w; - v->by_fd_next = m->by_fd[fd_index].windows; - v->by_fd_prev = (unsigned) -1; +_pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) { + assert(w); + assert(fd >= 0); + assert(size > 0); - m->by_fd[fd_index].windows = w; + return + w->fd && + fd == w->fd->fd && + prot == w->prot && + offset >= w->offset && + offset + size <= w->offset + w->size; } -static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) { - Window *v; +static Window *window_add(MMapCache *m) { + Window *w; assert(m); - assert(fd_index < m->n_fds); - v = m->windows + w; - if (v->by_fd_prev == (unsigned) -1) - m->by_fd[fd_index].windows = v->by_fd_next; - else - m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next; + if (!m->last_unused || m->n_windows <= WINDOWS_MIN) { - if (v->by_fd_next != (unsigned) -1) - m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev; -} + /* Allocate a new window */ + w = new0(Window, 1); + if (!w) + return NULL; + m->n_windows++; + } else { -static void mmap_cache_context_unset(MMapCache *m, unsigned c) { - Window *v; - unsigned w; + /* Reuse an existing one */ + w = m->last_unused; + window_unlink(w); + zero(*w); + } - assert(m); - assert(c < m->contexts_max); + w->cache = m; + return w; +} - if (m->by_context[c] == (unsigned) -1) - return; +static void context_detach_window(Context *c) { + Window *w; - w = m->by_context[c]; - m->by_context[c] = (unsigned) -1; + assert(c); - v = m->windows + w; - assert(v->n_ref > 0); - v->n_ref --; + if (!c->window) + return; - if (v->n_ref == 0) - mmap_cache_window_add_lru(m, w); + w = c->window; + c->window = NULL; + LIST_REMOVE(by_window, w->contexts, c); + + if (!w->contexts && !w->keep_always) { + /* Not used anymore? */ +#ifdef ENABLE_DEBUG_MMAP_CACHE + /* Unmap unused windows immediately to expose use-after-unmap + * by SIGSEGV. */ + window_free(w); +#else + LIST_PREPEND(unused, c->cache->unused, w); + if (!c->cache->last_unused) + c->cache->last_unused = w; + + w->in_unused = true; +#endif + } } -static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) { - Window *v; - - assert(m); - assert(c < m->contexts_max); - assert(w < m->n_windows); +static void context_attach_window(Context *c, Window *w) { + assert(c); + assert(w); - if (m->by_context[c] == w) + if (c->window == w) return; - mmap_cache_context_unset(m, c); + context_detach_window(c); + + if (w->in_unused) { + /* Used again? */ + LIST_REMOVE(unused, c->cache->unused, w); + if (c->cache->last_unused == w) + c->cache->last_unused = w->unused_prev; - m->by_context[c] = w; + w->in_unused = false; + } - v = m->windows + w; - v->n_ref ++; - if (v->n_ref == 1) - mmap_cache_window_remove_lru(m, w); + c->window = w; + LIST_PREPEND(by_window, w->contexts, c); } -static void mmap_cache_free(MMapCache *m) { +static Context *context_add(MMapCache *m, unsigned id) { + Context *c; assert(m); - if (m->windows) { - unsigned w; + c = m->contexts[id]; + if (c) + return c; + + c = new0(Context, 1); + if (!c) + return NULL; + + c->cache = m; + c->id = id; - for (w = 0; w < m->n_windows; w++) - mmap_cache_window_unmap(m, w); + assert(!m->contexts[id]); + m->contexts[id] = c; - free(m->windows); + return c; +} + +static void context_free(Context *c) { + assert(c); + + context_detach_window(c); + + if (c->cache) { + assert(c->cache->contexts[c->id] == c); + c->cache->contexts[c->id] = NULL; } - free(m->by_context); - free(m->by_fd); - free(m); + free(c); } -MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max) { - MMapCache *m; +static void fd_free(FileDescriptor *f) { + assert(f); - assert(contexts_max > 0); - assert(fds_max > 0); + while (f->windows) + window_free(f->windows); - m = new0(MMapCache, 1); - if (!m) - return NULL; + if (f->cache) + assert_se(hashmap_remove(f->cache->fds, INT_TO_PTR(f->fd + 1))); - m->contexts_max = contexts_max; - m->fds_max = fds_max; - m->windows_max = MAX(m->contexts_max, WINDOWS_MAX); - m->n_ref = 1; - m->lru_first = (unsigned) -1; - m->lru_last = (unsigned) -1; + free(f); +} - m->windows = new(Window, m->windows_max); - if (!m->windows) { - mmap_cache_free(m); +static FileDescriptor* fd_add(MMapCache *m, int fd) { + FileDescriptor *f; + int r; + + assert(m); + assert(fd >= 0); + + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (f) + return f; + + r = hashmap_ensure_allocated(&m->fds, NULL); + if (r < 0) return NULL; - } - m->by_context = new(unsigned, m->contexts_max); - if (!m->by_context) { - mmap_cache_free(m); + f = new0(FileDescriptor, 1); + if (!f) return NULL; - } - memset(m->by_context, -1, m->contexts_max * sizeof(unsigned)); - m->by_fd = new(FileDescriptor, m->fds_max); - if (!m->by_fd) { - mmap_cache_free(m); + f->cache = m; + f->fd = fd; + + r = hashmap_put(m->fds, UINT_TO_PTR(fd + 1), f); + if (r < 0) { + free(f); return NULL; } - return m; + return f; } -MMapCache* mmap_cache_ref(MMapCache *m) { +static void mmap_cache_free(MMapCache *m) { + FileDescriptor *f; + int i; + assert(m); - assert(m->n_ref > 0); - m->n_ref++; - return m; + for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++) + if (m->contexts[i]) + context_free(m->contexts[i]); + + while ((f = hashmap_first(m->fds))) + fd_free(f); + + hashmap_free(m->fds); + + while (m->unused) + window_free(m->unused); + + free(m); } MMapCache* mmap_cache_unref(MMapCache *m) { assert(m); assert(m->n_ref > 0); - if (m->n_ref == 1) + m->n_ref --; + if (m->n_ref == 0) mmap_cache_free(m); - else - m->n_ref--; return NULL; } -static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) { +static int make_room(MMapCache *m) { assert(m); - assert(w); - if (m->n_windows < m->windows_max) { - *w = m->n_windows ++; + if (!m->last_unused) + return 0; + + window_free(m->last_unused); + return 1; +} + +static int try_context( + MMapCache *m, + int fd, + int prot, + unsigned context, + bool keep_always, + uint64_t offset, + size_t size, + void **ret) { + + Context *c; + + assert(m); + assert(m->n_ref > 0); + assert(fd >= 0); + assert(size > 0); + assert(ret); + + c = m->contexts[context]; + if (!c) + return 0; + + assert(c->id == context); + + if (!c->window) return 0; - } - if (m->lru_first == (unsigned) -1) - return -E2BIG; + if (!window_matches(c->window, fd, prot, offset, size)) { + + /* Drop the reference to the window, since it's unnecessary now */ + context_detach_window(c); + return 0; + } - *w = m->lru_first; - mmap_cache_window_unmap(m, *w); - mmap_cache_window_remove_lru(m, *w); + c->window->keep_always |= keep_always; - return 0; + *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset); + return 1; } -static int mmap_cache_make_room(MMapCache *m) { - unsigned w; +static int find_mmap( + MMapCache *m, + int fd, + int prot, + unsigned context, + bool keep_always, + uint64_t offset, + size_t size, + void **ret) { + + FileDescriptor *f; + Window *w; + Context *c; assert(m); + assert(m->n_ref > 0); + assert(fd >= 0); + assert(size > 0); - w = m->lru_first; - while (w != (unsigned) -1) { - Window *v; + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) + return 0; - v = m->windows + w; + assert(f->fd == fd); - if (v->ptr) { - mmap_cache_window_unmap(m, w); - return 1; - } + LIST_FOREACH(by_fd, w, f->windows) + if (window_matches(w, fd, prot, offset, size)) + break; - w = v->lru_next; - } + if (!w) + return 0; + + c = context_add(m, context); + if (!c) + return -ENOMEM; - return 0; + context_attach_window(c, w); + w->keep_always += keep_always; + + *ret = (uint8_t*) w->ptr + (offset - w->offset); + return 1; } -static int mmap_cache_put( +static int add_mmap( MMapCache *m, int fd, - unsigned fd_index, int prot, unsigned context, + bool keep_always, uint64_t offset, - uint64_t size, + size_t size, + struct stat *st, void **ret) { - unsigned w; - Window *v; - void *d; uint64_t woffset, wsize; + Context *c; + FileDescriptor *f; + Window *w; + void *d; int r; assert(m); + assert(m->n_ref > 0); assert(fd >= 0); - assert(context < m->contexts_max); assert(size > 0); assert(ret); @@ -343,6 +473,18 @@ static int mmap_cache_put( wsize = WINDOW_SIZE; } + if (st) { + /* Memory maps that are larger then the files + underneath have undefined behavior. Hence, clamp + things to the file size if we know it */ + + if (woffset >= (uint64_t) st->st_size) + return -EADDRNOTAVAIL; + + if (woffset + wsize > (uint64_t) st->st_size) + wsize = PAGE_ALIGN(st->st_size - woffset); + } + for (;;) { d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset); if (d != MAP_FAILED) @@ -350,153 +492,44 @@ static int mmap_cache_put( if (errno != ENOMEM) return -errno; - r = mmap_cache_make_room(m); + r = make_room(m); if (r < 0) return r; if (r == 0) return -ENOMEM; } - r = mmap_cache_allocate_window(m, &w); - if (r < 0) { - munmap(d, wsize); - return r; - } - - v = m->windows + w; - v->fd = fd; - v->ptr = d; - v->offset = woffset; - v->size = wsize; - - v->n_ref = 0; - v->lru_prev = v->lru_next = (unsigned) -1; - - mmap_cache_fd_add(m, fd_index, w); - mmap_cache_context_set(m, context, w); - - *ret = (uint8_t*) d + (offset - woffset); - return 1; -} - -static int fd_cmp(const void *_a, const void *_b) { - const FileDescriptor *a = _a, *b = _b; - - if (a->fd < b->fd) - return -1; - if (a->fd > b->fd) - return 1; - - return 0; -} - -static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) { - FileDescriptor *j; - - assert(m); - assert(fd >= 0); - assert(fd_index); - - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - if (!j) { - if (m->n_fds >= m->fds_max) - return -E2BIG; - - j = m->by_fd + m->n_fds ++; - j->fd = fd; - j->windows = (unsigned) -1; - - qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - } - - *fd_index = (unsigned) (j - m->by_fd); - return 0; -} - -static bool mmap_cache_test_window( - MMapCache *m, - unsigned w, - uint64_t offset, - uint64_t size) { - Window *v; - - assert(m); - assert(w < m->n_windows); - assert(size > 0); - - v = m->windows + w; - - return offset >= v->offset && - offset + size <= v->offset + v->size; -} - -static int mmap_cache_current( - MMapCache *m, - int fd, - unsigned context, - uint64_t offset, - uint64_t size, - void **ret) { + c = context_add(m, context); + if (!c) + goto outofmem; - Window *v; - unsigned w; + f = fd_add(m, fd); + if (!f) + goto outofmem; - assert(m); - assert(fd >= 0); - assert(context < m->contexts_max); - assert(size > 0); - assert(ret); + w = window_add(m); + if (!w) + goto outofmem; - if (m->by_context[context] == (unsigned) -1) - return 0; - - w = m->by_context[context]; - v = m->windows + w; + w->keep_always = keep_always; + w->ptr = d; + w->offset = woffset; + w->prot = prot; + w->size = wsize; + w->fd = f; - if (v->fd != fd) - return 0; + LIST_PREPEND(by_fd, f->windows, w); - if (!mmap_cache_test_window(m, w, offset, size)) - return 0; + context_detach_window(c); + c->window = w; + LIST_PREPEND(by_window, w->contexts, c); - *ret = (uint8_t*) v->ptr + (offset - v->offset); + *ret = (uint8_t*) w->ptr + (offset - w->offset); return 1; -} - -static int mmap_cache_find( - MMapCache *m, - unsigned fd_index, - unsigned context, - uint64_t offset, - uint64_t size, - void **ret) { - Window *v = NULL; - unsigned w; - - assert(m); - assert(fd_index < m->n_fds); - assert(context < m->contexts_max); - assert(size > 0); - assert(ret); - - w = m->by_fd[fd_index].windows; - while (w != (unsigned) -1) { - if (mmap_cache_test_window(m, w, offset, size)) - break; - - w = m->windows[w].by_fd_next; - } - - if (w == (unsigned) -1) - return 0; - - mmap_cache_context_set(m, context, w); - - v = m->windows + w; - *ret = (uint8_t*) v->ptr + (offset - v->offset); - return 1; +outofmem: + munmap(d, wsize); + return -ENOMEM; } int mmap_cache_get( @@ -504,73 +537,62 @@ int mmap_cache_get( int fd, int prot, unsigned context, + bool keep_always, uint64_t offset, - uint64_t size, + size_t size, + struct stat *st, void **ret) { - unsigned fd_index; int r; assert(m); + assert(m->n_ref > 0); assert(fd >= 0); - assert(context < m->contexts_max); assert(size > 0); assert(ret); + assert(context < MMAP_CACHE_MAX_CONTEXTS); - /* Maybe the current pointer for this context is already the - * right one? */ - r = mmap_cache_current(m, fd, context, offset, size, ret); - if (r != 0) + /* Check whether the current context is the right one already */ + r = try_context(m, fd, prot, context, keep_always, offset, size, ret); + if (r != 0) { + m->n_hit ++; return r; + } - /* OK, let's find the chain for this FD */ - r = mmap_cache_get_fd_index(m, fd, &fd_index); - if (r < 0) + /* Search for a matching mmap */ + r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret); + if (r != 0) { + m->n_hit ++; return r; + } - /* And let's look through the available mmaps */ - r = mmap_cache_find(m, fd_index, context, offset, size, ret); - if (r != 0) - return r; + m->n_missed++; - /* Not found? Then, let's add it */ - return mmap_cache_put(m, fd, fd_index, prot, context, offset, size, ret); + /* Create a new mmap */ + return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret); } void mmap_cache_close_fd(MMapCache *m, int fd) { - FileDescriptor *j; - unsigned fd_index, c, w; + FileDescriptor *f; assert(m); - assert(fd > 0); + assert(fd >= 0); - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - if (!j) + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) return; - fd_index = (unsigned) (j - m->by_fd); - - for (c = 0; c < m->contexts_max; c++) { - w = m->by_context[c]; - if (w == (unsigned) -1) - continue; - - if (m->windows[w].fd == fd) - mmap_cache_context_unset(m, c); - } - - w = m->by_fd[fd_index].windows; - while (w != (unsigned) -1) { - mmap_cache_fd_remove(m, fd_index, w); - mmap_cache_window_unmap(m, w); + fd_free(f); +} - w = m->by_fd[fd_index].windows; - } +unsigned mmap_cache_get_hit(MMapCache *m) { + assert(m); - memmove(m->by_fd + fd_index, m->by_fd + fd_index + 1, (m->n_fds - (fd_index + 1)) * sizeof(FileDescriptor)); - m->n_fds --; + return m->n_hit; } -void mmap_cache_close_context(MMapCache *m, unsigned context) { - mmap_cache_context_unset(m, context); +unsigned mmap_cache_get_missed(MMapCache *m) { + assert(m); + + return m->n_missed; }