1 /* $Id: tst.c 6083 2002-12-27 07:24:36Z rra $
3 ** Ternary search trie implementation.
5 ** A ternary search trie stores key/value pairs where the key is a
6 ** nul-terminated string and the value is an arbitrary pointer. It uses a
7 ** data structure designed for fast lookups, where each level of the trie
8 ** represents a character in the string being searched for.
10 ** This implementation is based on the implementation by Peter A. Friend
11 ** (version 1.3), but has been assimilated into INN and modified to use INN
12 ** formatting conventions. If new versions are released, examine the
13 ** differences between that version and version 1.3 (which was checked into
14 ** INN as individual files in case it's no longer available) and then apply
15 ** the changes to this file.
17 ** Copyright (c) 2002, Peter A. Friend
18 ** All rights reserved.
20 ** Redistribution and use in source and binary forms, with or without
21 ** modification, are permitted provided that the following conditions are
24 ** Redistributions of source code must retain the above copyright notice,
25 ** this list of conditions and the following disclaimer.
27 ** Redistributions in binary form must reproduce the above copyright notice,
28 ** this list of conditions and the following disclaimer in the documentation
29 ** and/or other materials provided with the distribution.
31 ** Neither the name of Peter A. Friend nor the names of his contributors may
32 ** be used to endorse or promote products derived from this software without
33 ** specific prior written permission.
35 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
36 ** IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
37 ** THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 ** PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
39 ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40 ** EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41 ** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42 ** PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43 ** LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44 ** NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45 ** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 /* A single node in the ternary search trie. Stores a character, which is
56 part of the string formed by walking the tree from its root down to the
57 node, and left, right, and middle pointers to child nodes. If value is
58 non-zero (not a nul), middle is the pointer to follow if the desired
59 string's character matches value. left is used if it's less than value and
60 right is used if it's greater than value. If value is zero, this is a
61 terminal node, and middle holds a pointer to the data associated with the
62 string that ends at this point. */
70 /* The search trie structure. node_line_width is the number of nodes that are
71 allocated at a time, and node_lines holds a linked list of groups of nodes.
72 The free_list is a linked list (through the middle pointers) of available
73 nodes to use, and head holds pointers to the first nodes for each possible
74 first letter of the string. */
77 struct node_lines *node_lines;
78 struct node *free_list;
79 struct node *head[256];
82 /* A simple linked list structure used to hold all the groups of nodes. */
84 struct node *node_line;
85 struct node_lines *next;
90 ** Given a node and a character, decide whether a new node for that character
91 ** should be placed as the left child of that node. (If false, it should be
92 ** placed as the right child.)
94 #define LEFTP(n, c) (((n)->value == 0) ? ((c) < 64) : ((c) < (n)->value))
98 ** Allocate new nodes for the free list, called when the free list is empty.
101 tst_grow_node_free_list(struct tst *tst)
103 struct node *current_node;
104 struct node_lines *new_line;
107 new_line = xmalloc(sizeof(struct node_lines));
108 new_line->node_line = xcalloc(tst->node_line_width, sizeof(struct node));
109 new_line->next = tst->node_lines;
110 tst->node_lines = new_line;
112 current_node = tst->node_lines->node_line;
113 tst->free_list = current_node;
114 for (i = 1; i < tst->node_line_width; i++) {
115 current_node->middle = &(tst->node_lines->node_line[i]);
116 current_node = current_node->middle;
118 current_node->middle = NULL;
123 ** Grab a node from the free list and initialize it with the given value.
126 tst_get_free_node(struct tst *tst, unsigned char value)
128 struct node *free_node;
130 if (tst->free_list == NULL)
131 tst_grow_node_free_list(tst);
132 free_node = tst->free_list;
133 tst->free_list = tst->free_list->middle;
134 free_node->middle = NULL;
135 free_node->value = value;
141 ** tst_init allocates memory for members of struct tst, and allocates the
142 ** first node_line_width nodes. The value for width must be chosen very
143 ** carefully. One node is required for every character in the tree. If you
144 ** choose a value that is too small, your application will spend too much
145 ** time calling malloc and your node space will be too spread out. Too large
146 ** a value is just a waste of space.
153 tst = xcalloc(1, sizeof(struct tst));
154 tst->node_lines = NULL;
155 tst->node_line_width = width;
156 tst_grow_node_free_list(tst);
162 ** tst_insert inserts the string key into the tree. Behavior when a
163 ** duplicate key is inserted is controlled by option. If key is already in
164 ** the tree then TST_DUPLICATE_KEY is returned, and the data pointer for the
165 ** existing key is placed in exist_ptr. If option is set to TST_REPLACE then
166 ** the existing data pointer for the existing key is replaced by data. The
167 ** old data pointer will still be placed in exist_ptr.
169 ** If a duplicate key is encountered and option is not set to TST_REPLACE
170 ** then TST_DUPLICATE_KEY is returned. If key is zero-length, then
171 ** TST_NULL_KEY is returned. A successful insert or replace returns TST_OK.
173 ** The data argument may not be NULL; if it is, TST_NULL_DATA is returned.
174 ** If you just want a simple existence tree, use the tst pointer as the data
178 tst_insert(struct tst *tst, const unsigned char *key, void *data, int option,
181 struct node *current_node = NULL;
182 struct node **root_node = NULL;
186 return TST_NULL_DATA;
188 if (key == NULL || *key == '\0')
192 if (tst->head[*key] == NULL)
193 root_node = &tst->head[*key];
195 current_node = tst->head[*key];
197 while (root_node == NULL) {
198 if (key[key_index] == current_node->value) {
199 if (key[key_index] == '\0') {
200 if (exist_ptr != NULL)
201 *exist_ptr = current_node->middle;
202 if (option == TST_REPLACE) {
203 current_node->middle = data;
206 return TST_DUPLICATE_KEY;
208 if (current_node->middle == NULL)
209 root_node = ¤t_node->middle;
211 current_node = current_node->middle;
214 } else if (LEFTP(current_node, key[key_index])) {
215 if (current_node->left == NULL)
216 root_node = ¤t_node->left;
218 current_node = current_node->left;
220 if (current_node->right == NULL)
221 root_node = ¤t_node->right;
223 current_node = current_node->right;
228 *root_node = tst_get_free_node(tst, key[key_index]);
229 current_node = *root_node;
231 while (key[key_index] != '\0') {
233 current_node->middle = tst_get_free_node(tst, key[key_index]);
234 current_node = current_node->middle;
237 current_node->middle = data;
243 ** tst_search finds the string key in the tree if it exists and returns the
244 ** data pointer associated with that key or NULL if it's not found.
247 tst_search(struct tst *tst, const unsigned char *key)
249 struct node *current_node;
252 if (key == NULL || *key == '\0')
255 if (tst->head[*key] == NULL)
258 current_node = tst->head[*key];
260 while (current_node != NULL) {
261 if (key[key_index] == current_node->value) {
262 if (current_node->value == '\0')
263 return current_node->middle;
265 current_node = current_node->middle;
269 } else if (LEFTP(current_node, key[key_index]))
270 current_node = current_node->left;
272 current_node = current_node->right;
279 ** tst_delete deletes the string key from the tree if it exists and returns
280 ** the data pointer assocaited with that key, or NULL if it wasn't found.
283 tst_delete(struct tst *tst, const unsigned char *key)
285 struct node *current_node;
286 struct node *current_node_parent;
287 struct node *last_branch;
288 struct node *last_branch_parent;
289 struct node *next_node;
290 struct node *last_branch_replacement;
291 struct node *last_branch_dangling_child;
294 if (key == NULL || *key == '\0')
297 if (tst->head[*key] == NULL)
301 last_branch_parent = NULL;
302 current_node = tst->head[*key];
303 current_node_parent = NULL;
305 while (current_node != NULL) {
306 if (key[key_index] == current_node->value) {
307 if (current_node->left != NULL || current_node->right != NULL) {
308 last_branch = current_node;
309 last_branch_parent = current_node_parent;
311 if (key[key_index] == '\0')
314 current_node_parent = current_node;
315 current_node = current_node->middle;
318 } else if (LEFTP(current_node, key[key_index])) {
319 last_branch_parent = current_node;
320 current_node_parent = current_node;
321 current_node = current_node->left;
322 last_branch = current_node;
324 last_branch_parent = current_node;
325 current_node_parent = current_node;
326 current_node = current_node->right;
327 last_branch = current_node;
330 if (current_node == NULL)
333 if (last_branch == NULL) {
334 next_node = tst->head[*key];
335 tst->head[*key] = NULL;
336 } else if (last_branch->left == NULL && last_branch->right == NULL) {
337 if (last_branch_parent->left == last_branch)
338 last_branch_parent->left = NULL;
340 last_branch_parent->right = NULL;
341 next_node = last_branch;
343 if (last_branch->left != NULL && last_branch->right != NULL) {
344 last_branch_replacement = last_branch->right;
345 last_branch_dangling_child = last_branch->left;
346 } else if (last_branch->right != NULL) {
347 last_branch_replacement = last_branch->right;
348 last_branch_dangling_child = NULL;
350 last_branch_replacement = last_branch->left;
351 last_branch_dangling_child = NULL;
354 if (last_branch_parent == NULL)
355 tst->head[*key] = last_branch_replacement;
357 if (last_branch_parent->left == last_branch)
358 last_branch_parent->left = last_branch_replacement;
359 else if (last_branch_parent->right == last_branch)
360 last_branch_parent->right = last_branch_replacement;
362 last_branch_parent->middle = last_branch_replacement;
365 if (last_branch_dangling_child != NULL) {
366 current_node = last_branch_replacement;
367 while (current_node->left != NULL)
368 current_node = current_node->left;
369 current_node->left = last_branch_dangling_child;
372 next_node = last_branch;
376 current_node = next_node;
377 next_node = current_node->middle;
379 current_node->left = NULL;
380 current_node->right = NULL;
381 current_node->middle = tst->free_list;
382 tst->free_list = current_node;
383 } while (current_node->value != 0);
390 ** tst_cleanup frees all memory allocated to nodes, internal structures,
391 ** as well as tst itself.
394 tst_cleanup(struct tst *tst)
396 struct node_lines *current_line;
397 struct node_lines *next_line;
399 next_line = tst->node_lines;
401 current_line = next_line;
402 next_line = current_line->next;
403 free(current_line->node_line);
405 } while (next_line != NULL);