/***
This file is part of systemd.
- Copyright 2012 Kay Sievers <kay.sievers@vrfy.org>
+ Copyright 2012 Kay Sievers <kay@vrfy.org>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
#include "util.h"
#include "strbuf.h"
+/*
+ * Strbuf stores given strings in a single continuous allocated memory
+ * area. Identical strings are de-duplicated and return the same offset
+ * as the first string stored. If the tail of a string already exists
+ * in the buffer, the tail is returned.
+ *
+ * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
+ * information about the stored strings.
+ *
+ * Example of udev rules:
+ * $ ./udevadm test .
+ * ...
+ * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
+ * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
+ * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
+ * ...
+ */
+
struct strbuf *strbuf_new(void) {
struct strbuf *str;
free(node);
}
+/* clean up trie data, leave only the string buffer */
void strbuf_complete(struct strbuf *str) {
if (!str)
return;
str->root = NULL;
}
+/* clean up everything */
void strbuf_cleanup(struct strbuf *str) {
if (!str)
return;
free(str);
}
-static int strbuf_children_cmp(const void *v1, const void *v2) {
- const struct strbuf_child_entry *n1 = v1;
- const struct strbuf_child_entry *n2 = v2;
-
+static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
+ const struct strbuf_child_entry *n2) {
return n1->c - n2->c;
}
+static void bubbleinsert(struct strbuf_node *node,
+ uint8_t c,
+ struct strbuf_node *node_child) {
+
+ struct strbuf_child_entry new = {
+ .c = c,
+ .child = node_child,
+ };
+ int left = 0, right = node->children_count;
+
+ while (right > left) {
+ int middle = (right + left) / 2 ;
+ if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+
+ memmove(node->children + left + 1, node->children + left,
+ sizeof(struct strbuf_child_entry) * (node->children_count - left));
+ node->children[left] = new;
+
+ node->children_count ++;
+}
+
+/* add string, return the index/offset into the buffer */
ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
uint8_t c;
struct strbuf_node *node;
/* lookup child node */
c = s[len - 1 - depth];
search.c = c;
- child = bsearch(&search, node->children, node->children_count, sizeof(struct strbuf_child_entry),
- strbuf_children_cmp);
+ child = bsearch(&search, node->children, node->children_count,
+ sizeof(struct strbuf_child_entry),
+ (__compar_fn_t) strbuf_children_cmp);
if (!child)
break;
node = child->child;
node_child = new0(struct strbuf_node, 1);
if (!node_child)
return -ENOMEM;
- str->nodes_count++;
node_child->value_off = off;
node_child->value_len = len;
/* extend array, add new entry, sort for bisection */
child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
- if (!child)
+ if (!child) {
+ free(node_child);
return -ENOMEM;
+ }
+
+ str->nodes_count++;
+
node->children = child;
- node->children[node->children_count].c = c;
- node->children[node->children_count].child = node_child;
- node->children_count++;
- qsort(node->children, node->children_count, sizeof(struct strbuf_child_entry), strbuf_children_cmp);
+ bubbleinsert(node, c, node_child);
return off;
}