1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2012 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 #include "mmap-cache.h"
32 #define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
33 #define WINDOWS_MAX 32
35 typedef struct Window {
49 typedef struct FileDescriptor {
57 unsigned contexts_max;
64 unsigned lru_first, lru_last;
68 FileDescriptor *by_fd;
71 static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
73 static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
77 assert(w < m->n_windows);
83 munmap(v->ptr, v->size);
87 static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
91 assert(w < m->n_windows);
94 assert(v->n_ref == 0);
96 if (m->lru_last != (unsigned) -1) {
97 assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
98 m->windows[m->lru_last].lru_next = w;
101 v->lru_prev = m->lru_last;
102 v->lru_next = (unsigned) -1;
105 if (m->lru_first == (unsigned) -1)
109 static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
113 assert(w < m->n_windows);
117 if (v->lru_prev == (unsigned) -1) {
118 assert(m->lru_first == w);
119 m->lru_first = v->lru_next;
121 assert(m->windows[v->lru_prev].lru_next == w);
122 m->windows[v->lru_prev].lru_next = v->lru_next;
125 if (v->lru_next == (unsigned) -1) {
126 assert(m->lru_last == w);
127 m->lru_last = v->lru_prev;
129 assert(m->windows[v->lru_next].lru_prev == w);
130 m->windows[v->lru_next].lru_prev = v->lru_prev;
134 static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
138 assert(fd_index < m->n_fds);
141 assert(m->by_fd[fd_index].fd == v->fd);
143 if (m->by_fd[fd_index].windows != (unsigned) -1) {
144 assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
145 m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
148 v->by_fd_next = m->by_fd[fd_index].windows;
149 v->by_fd_prev = (unsigned) -1;
151 m->by_fd[fd_index].windows = w;
154 static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
158 assert(fd_index < m->n_fds);
161 assert(m->by_fd[fd_index].fd == v->fd);
162 assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
163 assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
165 if (v->by_fd_prev == (unsigned) -1) {
166 assert(m->by_fd[fd_index].windows == w);
167 m->by_fd[fd_index].windows = v->by_fd_next;
169 assert(m->windows[v->by_fd_prev].by_fd_next == w);
170 m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
173 if (v->by_fd_next != (unsigned) -1) {
174 assert(m->windows[v->by_fd_next].by_fd_prev == w);
175 m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
179 static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
184 assert(c < m->contexts_max);
186 if (m->by_context[c] == (unsigned) -1)
189 w = m->by_context[c];
190 m->by_context[c] = (unsigned) -1;
193 assert(v->n_ref > 0);
197 mmap_cache_window_add_lru(m, w);
200 static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
204 assert(c < m->contexts_max);
205 assert(w < m->n_windows);
207 if (m->by_context[c] == w)
210 mmap_cache_context_unset(m, c);
212 m->by_context[c] = w;
218 mmap_cache_window_remove_lru(m, w);
221 static void mmap_cache_free(MMapCache *m) {
228 for (w = 0; w < m->n_windows; w++)
229 mmap_cache_window_unmap(m, w);
239 MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max) {
242 assert(contexts_max > 0);
245 m = new0(MMapCache, 1);
249 m->contexts_max = contexts_max;
250 m->fds_max = fds_max;
251 m->windows_max = MAX(m->contexts_max, WINDOWS_MAX);
253 m->lru_first = (unsigned) -1;
254 m->lru_last = (unsigned) -1;
256 m->windows = new(Window, m->windows_max);
262 m->by_context = new(unsigned, m->contexts_max);
263 if (!m->by_context) {
267 memset(m->by_context, -1, m->contexts_max * sizeof(unsigned));
269 m->by_fd = new(FileDescriptor, m->fds_max);
278 MMapCache* mmap_cache_ref(MMapCache *m) {
280 assert(m->n_ref > 0);
286 MMapCache* mmap_cache_unref(MMapCache *m) {
288 assert(m->n_ref > 0);
298 static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
305 if (m->n_windows < m->windows_max) {
306 *w = m->n_windows ++;
310 if (m->lru_first == (unsigned) -1)
315 assert(v->n_ref == 0);
317 mmap_cache_window_unmap(m, *w);
320 assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
321 mmap_cache_fd_remove(m, fd_index, *w);
324 mmap_cache_window_remove_lru(m, *w);
329 static int mmap_cache_make_room(MMapCache *m) {
335 while (w != (unsigned) -1) {
341 mmap_cache_window_unmap(m, w);
351 static int mmap_cache_put(
364 uint64_t woffset, wsize;
369 assert(context < m->contexts_max);
373 woffset = offset & ~((uint64_t) page_size() - 1ULL);
374 wsize = size + (offset - woffset);
375 wsize = PAGE_ALIGN(wsize);
377 if (wsize < WINDOW_SIZE) {
380 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
391 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
397 r = mmap_cache_make_room(m);
404 r = mmap_cache_allocate_window(m, &w);
417 mmap_cache_window_add_lru(m, w);
418 mmap_cache_fd_add(m, fd_index, w);
419 mmap_cache_context_set(m, context, w);
421 *ret = (uint8_t*) d + (offset - woffset);
425 static int fd_cmp(const void *_a, const void *_b) {
426 const FileDescriptor *a = _a, *b = _b;
436 static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
444 for (r = 0; r < m->n_fds; r++)
445 assert(m->by_fd[r].windows == (unsigned) -1 ||
446 m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
448 j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
452 *fd_index = (unsigned) (j - m->by_fd);
456 static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
464 r = mmap_cache_peek_fd_index(m, fd, fd_index);
468 if (m->n_fds >= m->fds_max)
471 j = m->by_fd + m->n_fds ++;
473 j->windows = (unsigned) -1;
475 qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
477 return mmap_cache_peek_fd_index(m, fd, fd_index);
480 static bool mmap_cache_test_window(
488 assert(w < m->n_windows);
493 return offset >= v->offset &&
494 offset + size <= v->offset + v->size;
497 static int mmap_cache_current(
510 assert(context < m->contexts_max);
514 if (m->by_context[context] == (unsigned) -1)
517 w = m->by_context[context];
523 if (!mmap_cache_test_window(m, w, offset, size))
526 *ret = (uint8_t*) v->ptr + (offset - v->offset);
530 static int mmap_cache_find(
544 assert(fd_index < m->n_fds);
545 assert(context < m->contexts_max);
549 w = m->by_fd[fd_index].windows;
550 while (w != (unsigned) -1) {
554 if (mmap_cache_test_window(m, w, offset, size))
560 if (w == (unsigned) -1)
563 mmap_cache_context_set(m, context, w);
565 *ret = (uint8_t*) v->ptr + (offset - v->offset);
583 assert(context < m->contexts_max);
587 /* Maybe the current pointer for this context is already the
589 r = mmap_cache_current(m, fd, context, offset, size, ret);
593 /* Hmm, drop the reference to the current one, since it wasn't
595 mmap_cache_context_unset(m, context);
597 /* OK, let's find the chain for this FD */
598 r = mmap_cache_get_fd_index(m, fd, &fd_index);
602 /* And let's look through the available mmaps */
603 r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
607 /* Not found? Then, let's add it */
608 return mmap_cache_put(m, fd, fd_index, prot, context, offset, size, ret);
611 void mmap_cache_close_fd(MMapCache *m, int fd) {
612 unsigned fd_index, c, w;
618 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
622 for (c = 0; c < m->contexts_max; c++) {
623 w = m->by_context[c];
624 if (w == (unsigned) -1)
627 if (m->windows[w].fd == fd)
628 mmap_cache_context_unset(m, c);
631 w = m->by_fd[fd_index].windows;
632 while (w != (unsigned) -1) {
638 mmap_cache_window_unmap(m, w);
639 mmap_cache_fd_remove(m, fd_index, w);
642 w = m->by_fd[fd_index].windows;
645 memmove(m->by_fd + fd_index, m->by_fd + fd_index + 1, (m->n_fds - (fd_index + 1)) * sizeof(FileDescriptor));
649 void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
650 unsigned fd_index, c, w;
656 /* This drops all windows that include space right of the
657 * specified offset. This is useful to ensure that after the
658 * file size is extended we drop our mappings of the end and
659 * create it anew, since otherwise it is undefined whether
660 * mapping will continue to work as intended. */
662 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
666 for (c = 0; c < m->contexts_max; c++) {
667 w = m->by_context[c];
669 if (w != (unsigned) -1 && m->windows[w].fd == fd)
670 mmap_cache_context_unset(m, c);
673 w = m->by_fd[fd_index].windows;
674 while (w != (unsigned) -1) {
679 assert(v->by_fd_next == (unsigned) -1 ||
680 m->windows[v->by_fd_next].fd == fd);
682 if (v->offset + v->size > p) {
684 mmap_cache_window_unmap(m, w);
685 mmap_cache_fd_remove(m, fd_index, w);
688 w = m->by_fd[fd_index].windows;
694 void mmap_cache_close_context(MMapCache *m, unsigned context) {
695 mmap_cache_context_unset(m, context);