From 955bd501c20ad0bc64104ed706225a824dafc375 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 22 Oct 2012 16:27:00 +0200 Subject: [PATCH] shared: strbuf - add string de-duplication facility --- Makefile.am | 2 + src/shared/strbuf.c | 155 ++++++++++++++++++++++++++++++++++++++++++++ src/shared/strbuf.h | 56 ++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 src/shared/strbuf.c create mode 100644 src/shared/strbuf.h diff --git a/Makefile.am b/Makefile.am index d39e3b62f..17b28cc79 100644 --- a/Makefile.am +++ b/Makefile.am @@ -758,6 +758,8 @@ libsystemd_shared_la_SOURCES = \ src/shared/set.h \ src/shared/strv.c \ src/shared/strv.h \ + src/shared/strbuf.c \ + src/shared/strbuf.h \ src/shared/conf-parser.c \ src/shared/conf-parser.h \ src/shared/log.c \ diff --git a/src/shared/strbuf.c b/src/shared/strbuf.c new file mode 100644 index 000000000..5c858cb68 --- /dev/null +++ b/src/shared/strbuf.c @@ -0,0 +1,155 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2012 Kay Sievers + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "util.h" +#include "strbuf.h" + +struct strbuf *strbuf_new(void) { + struct strbuf *str; + + str = new0(struct strbuf, 1); + if (!str) + return NULL; + + str->buf = new0(char, 1); + if (!str->buf) + goto err; + str->len = 1; + + str->root = new0(struct strbuf_node, 1); + if (!str->root) + goto err; + str->nodes_count = 1; + return str; +err: + free(str->buf); + free(str->root); + free(str); + return NULL; +} + +static void strbuf_node_cleanup(struct strbuf_node *node) { + size_t i; + + for (i = 0; i < node->children_count; i++) + strbuf_node_cleanup(node->children[i].child); + free(node->children); + free(node); +} + +void strbuf_complete(struct strbuf *str) { + if (!str) + return; + if (str->root) + strbuf_node_cleanup(str->root); + str->root = NULL; +} + +void strbuf_cleanup(struct strbuf *str) { + if (!str) + return; + if (str->root) + strbuf_node_cleanup(str->root); + free(str->buf); + free(str); +} + +static int strbuf_children_cmp(const void *v1, const void *v2) { + const struct strbuf_child_entry *n1 = v1; + const struct strbuf_child_entry *n2 = v2; + + return n1->c - n2->c; +} + +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { + uint8_t c; + struct strbuf_node *node; + size_t depth; + char *buf_new; + struct strbuf_child_entry *child; + struct strbuf_node *node_child; + ssize_t off; + + if (!str->root) + return -EINVAL; + + /* search string; start from last character to find possibly matching tails */ + if (len == 0) + return 0; + str->in_count++; + str->in_len += len; + + node = str->root; + c = s[len-1]; + for (depth = 0; depth <= len; depth++) { + struct strbuf_child_entry search; + + /* match against current node */ + off = node->value_off + node->value_len - len; + if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { + str->dedup_len += len; + str->dedup_count++; + return off; + } + + /* lookup child node */ + c = s[len - 1 - depth]; + search.c = c; + child = bsearch(&search, node->children, node->children_count, sizeof(struct strbuf_child_entry), + strbuf_children_cmp); + if (!child) + break; + node = child->child; + } + + /* add new string */ + buf_new = realloc(str->buf, str->len + len+1); + if (!buf_new) + return -ENOMEM; + str->buf = buf_new; + off = str->len; + memcpy(str->buf + off, s, len); + str->len += len; + str->buf[str->len++] = '\0'; + + /* new node */ + node_child = new0(struct strbuf_node, 1); + if (!node_child) + return -ENOMEM; + str->nodes_count++; + node_child->value_off = off; + node_child->value_len = len; + + /* extend array, add new entry, sort for bisection */ + child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry)); + if (!child) + return -ENOMEM; + node->children = child; + node->children[node->children_count].c = c; + node->children[node->children_count].child = node_child; + node->children_count++; + qsort(node->children, node->children_count, sizeof(struct strbuf_child_entry), strbuf_children_cmp); + + return off; +} diff --git a/src/shared/strbuf.h b/src/shared/strbuf.h new file mode 100644 index 000000000..35f232ddb --- /dev/null +++ b/src/shared/strbuf.h @@ -0,0 +1,56 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2012 Kay Sievers + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +struct strbuf { + char *buf; + size_t len; + struct strbuf_node *root; + + size_t nodes_count; + size_t in_count; + size_t in_len; + size_t dedup_len; + size_t dedup_count; +}; + +struct strbuf_node { + size_t value_off; + size_t value_len; + + struct strbuf_child_entry *children; + uint8_t children_count; +}; + +struct strbuf_child_entry { + uint8_t c; + struct strbuf_node *child; +}; + +struct strbuf *strbuf_new(void); +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len); +void strbuf_complete(struct strbuf *str); +void strbuf_cleanup(struct strbuf *str); -- 2.30.2