2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
4 * This file is copyright 1999-2001 Simon Tatham.
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include "puzzles.h" /* for smalloc/sfree */
37 #define LOG(x) (printf x)
42 typedef struct node234_Tag node234;
57 * Create a 2-3-4 tree.
59 tree234 *newtree234(cmpfn234 cmp) {
60 tree234 *ret = snew(tree234);
61 LOG(("created tree %p\n", ret));
68 * Free a 2-3-4 tree (not including freeing the elements).
70 static void freenode234(node234 *n) {
73 freenode234(n->kids[0]);
74 freenode234(n->kids[1]);
75 freenode234(n->kids[2]);
76 freenode234(n->kids[3]);
79 void freetree234(tree234 *t) {
85 * Internal function to count a node.
87 static int countnode234(node234 *n) {
92 for (i = 0; i < 4; i++)
93 count += n->counts[i];
94 for (i = 0; i < 3; i++)
101 * Count the elements in a tree.
103 int count234(tree234 *t) {
105 return countnode234(t->root);
111 * Propagate a node overflow up a tree until it stops. Returns 0 or
112 * 1, depending on whether the root had to be split or not.
114 static int add234_insert(node234 *left, void *e, node234 *right,
115 node234 **root, node234 *n, int ki) {
118 * We need to insert the new left/element/right set in n at
121 lcount = countnode234(left);
122 rcount = countnode234(right);
124 LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
126 n->kids[0], n->counts[0], n->elems[0],
127 n->kids[1], n->counts[1], n->elems[1],
128 n->kids[2], n->counts[2], n->elems[2],
129 n->kids[3], n->counts[3]));
130 LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n",
131 left, lcount, e, right, rcount, ki));
132 if (n->elems[1] == NULL) {
134 * Insert in a 2-node; simple.
137 LOG((" inserting on left of 2-node\n"));
138 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
139 n->elems[1] = n->elems[0];
140 n->kids[1] = right; n->counts[1] = rcount;
142 n->kids[0] = left; n->counts[0] = lcount;
143 } else { /* ki == 1 */
144 LOG((" inserting on right of 2-node\n"));
145 n->kids[2] = right; n->counts[2] = rcount;
147 n->kids[1] = left; n->counts[1] = lcount;
149 if (n->kids[0]) n->kids[0]->parent = n;
150 if (n->kids[1]) n->kids[1]->parent = n;
151 if (n->kids[2]) n->kids[2]->parent = n;
154 } else if (n->elems[2] == NULL) {
156 * Insert in a 3-node; simple.
159 LOG((" inserting on left of 3-node\n"));
160 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
161 n->elems[2] = n->elems[1];
162 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
163 n->elems[1] = n->elems[0];
164 n->kids[1] = right; n->counts[1] = rcount;
166 n->kids[0] = left; n->counts[0] = lcount;
167 } else if (ki == 1) {
168 LOG((" inserting in middle of 3-node\n"));
169 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
170 n->elems[2] = n->elems[1];
171 n->kids[2] = right; n->counts[2] = rcount;
173 n->kids[1] = left; n->counts[1] = lcount;
174 } else { /* ki == 2 */
175 LOG((" inserting on right of 3-node\n"));
176 n->kids[3] = right; n->counts[3] = rcount;
178 n->kids[2] = left; n->counts[2] = lcount;
180 if (n->kids[0]) n->kids[0]->parent = n;
181 if (n->kids[1]) n->kids[1]->parent = n;
182 if (n->kids[2]) n->kids[2]->parent = n;
183 if (n->kids[3]) n->kids[3]->parent = n;
187 node234 *m = snew(node234);
188 m->parent = n->parent;
189 LOG((" splitting a 4-node; created new node %p\n", m));
191 * Insert in a 4-node; split into a 2-node and a
192 * 3-node, and move focus up a level.
194 * I don't think it matters which way round we put the
195 * 2 and the 3. For simplicity, we'll put the 3 first
199 m->kids[0] = left; m->counts[0] = lcount;
201 m->kids[1] = right; m->counts[1] = rcount;
202 m->elems[1] = n->elems[0];
203 m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
205 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
206 n->elems[0] = n->elems[2];
207 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
208 } else if (ki == 1) {
209 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
210 m->elems[0] = n->elems[0];
211 m->kids[1] = left; m->counts[1] = lcount;
213 m->kids[2] = right; m->counts[2] = rcount;
215 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
216 n->elems[0] = n->elems[2];
217 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
218 } else if (ki == 2) {
219 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
220 m->elems[0] = n->elems[0];
221 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
222 m->elems[1] = n->elems[1];
223 m->kids[2] = left; m->counts[2] = lcount;
225 n->kids[0] = right; n->counts[0] = rcount;
226 n->elems[0] = n->elems[2];
227 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
228 } else { /* ki == 3 */
229 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
230 m->elems[0] = n->elems[0];
231 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
232 m->elems[1] = n->elems[1];
233 m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
234 n->kids[0] = left; n->counts[0] = lcount;
236 n->kids[1] = right; n->counts[1] = rcount;
239 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
240 m->counts[3] = n->counts[3] = n->counts[2] = 0;
241 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
242 if (m->kids[0]) m->kids[0]->parent = m;
243 if (m->kids[1]) m->kids[1]->parent = m;
244 if (m->kids[2]) m->kids[2]->parent = m;
245 if (n->kids[0]) n->kids[0]->parent = n;
246 if (n->kids[1]) n->kids[1]->parent = n;
247 LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m,
248 m->kids[0], m->counts[0], m->elems[0],
249 m->kids[1], m->counts[1], m->elems[1],
250 m->kids[2], m->counts[2]));
251 LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n,
252 n->kids[0], n->counts[0], n->elems[0],
253 n->kids[1], n->counts[1]));
254 left = m; lcount = countnode234(left);
255 right = n; rcount = countnode234(right);
258 ki = (n->parent->kids[0] == n ? 0 :
259 n->parent->kids[1] == n ? 1 :
260 n->parent->kids[2] == n ? 2 : 3);
265 * If we've come out of here by `break', n will still be
266 * non-NULL and all we need to do is go back up the tree
267 * updating counts. If we've come here because n is NULL, we
268 * need to create a new root for the tree because the old one
269 * has just split into two. */
272 int count = countnode234(n);
274 childnum = (n->parent->kids[0] == n ? 0 :
275 n->parent->kids[1] == n ? 1 :
276 n->parent->kids[2] == n ? 2 : 3);
277 n->parent->counts[childnum] = count;
280 return 0; /* root unchanged */
282 LOG((" root is overloaded, split into two\n"));
283 (*root) = snew(node234);
284 (*root)->kids[0] = left; (*root)->counts[0] = lcount;
285 (*root)->elems[0] = e;
286 (*root)->kids[1] = right; (*root)->counts[1] = rcount;
287 (*root)->elems[1] = NULL;
288 (*root)->kids[2] = NULL; (*root)->counts[2] = 0;
289 (*root)->elems[2] = NULL;
290 (*root)->kids[3] = NULL; (*root)->counts[3] = 0;
291 (*root)->parent = NULL;
292 if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root);
293 if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root);
294 LOG((" new root is %p/%d \"%s\" %p/%d\n",
295 (*root)->kids[0], (*root)->counts[0],
297 (*root)->kids[1], (*root)->counts[1]));
298 return 1; /* root moved */
303 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
304 * an existing element compares equal, returns that.
306 static void *add234_internal(tree234 *t, void *e, int index) {
312 LOG(("adding element \"%s\" to tree %p\n", e, t));
313 if (t->root == NULL) {
314 t->root = snew(node234);
315 t->root->elems[1] = t->root->elems[2] = NULL;
316 t->root->kids[0] = t->root->kids[1] = NULL;
317 t->root->kids[2] = t->root->kids[3] = NULL;
318 t->root->counts[0] = t->root->counts[1] = 0;
319 t->root->counts[2] = t->root->counts[3] = 0;
320 t->root->parent = NULL;
321 t->root->elems[0] = e;
322 LOG((" created root %p\n", t->root));
328 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
330 n->kids[0], n->counts[0], n->elems[0],
331 n->kids[1], n->counts[1], n->elems[1],
332 n->kids[2], n->counts[2], n->elems[2],
333 n->kids[3], n->counts[3]));
337 * Leaf node. We want to insert at kid position
338 * equal to the index:
345 * Internal node. We always descend through it (add
346 * always starts at the bottom, never in the
349 if (index <= n->counts[0]) {
351 } else if (index -= n->counts[0] + 1, index <= n->counts[1]) {
353 } else if (index -= n->counts[1] + 1, index <= n->counts[2]) {
355 } else if (index -= n->counts[2] + 1, index <= n->counts[3]) {
358 return NULL; /* error: index out of range */
361 if ((c = t->cmp(e, n->elems[0])) < 0)
364 return n->elems[0]; /* already exists */
365 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
368 return n->elems[1]; /* already exists */
369 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
372 return n->elems[2]; /* already exists */
376 LOG((" moving to child %d (%p)\n", ki, n->kids[ki]));
382 add234_insert(NULL, e, NULL, &t->root, n, ki);
387 void *add234(tree234 *t, void *e) {
388 if (!t->cmp) /* tree is unsorted */
391 return add234_internal(t, e, -1);
393 void *addpos234(tree234 *t, void *e, int index) {
394 if (index < 0 || /* index out of range */
395 t->cmp) /* tree is sorted */
396 return NULL; /* return failure */
398 return add234_internal(t, e, index); /* this checks the upper bound */
402 * Look up the element at a given numeric index in a 2-3-4 tree.
403 * Returns NULL if the index is out of range.
405 void *index234(tree234 *t, int index) {
409 return NULL; /* tree is empty */
411 if (index < 0 || index >= countnode234(t->root))
412 return NULL; /* out of range */
417 if (index < n->counts[0])
419 else if (index -= n->counts[0] + 1, index < 0)
421 else if (index < n->counts[1])
423 else if (index -= n->counts[1] + 1, index < 0)
425 else if (index < n->counts[2])
427 else if (index -= n->counts[2] + 1, index < 0)
433 /* We shouldn't ever get here. I wonder how we did. */
438 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
439 * found. e is always passed as the first argument to cmp, so cmp
440 * can be an asymmetric function if desired. cmp can also be passed
441 * as NULL, in which case the compare function from the tree proper
444 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
445 int relation, int *index) {
449 int idx, ecount, kcount, cmpret;
459 * Attempt to find the element itself.
464 * Prepare a fake `cmp' result if e is NULL.
468 assert(relation == REL234_LT || relation == REL234_GT);
469 if (relation == REL234_LT)
470 cmpret = +1; /* e is a max: always greater */
471 else if (relation == REL234_GT)
472 cmpret = -1; /* e is a min: always smaller */
475 for (kcount = 0; kcount < 4; kcount++) {
476 if (kcount >= 3 || n->elems[kcount] == NULL ||
477 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
480 if (n->kids[kcount]) idx += n->counts[kcount];
497 * We have found the element we're looking for. It's
498 * n->elems[ecount], at tree index idx. If our search
499 * relation is EQ, LE or GE we can now go home.
501 if (relation != REL234_LT && relation != REL234_GT) {
502 if (index) *index = idx;
503 return n->elems[ecount];
507 * Otherwise, we'll do an indexed lookup for the previous
508 * or next element. (It would be perfectly possible to
509 * implement these search types in a non-counted tree by
510 * going back up from where we are, but far more fiddly.)
512 if (relation == REL234_LT)
518 * We've found our way to the bottom of the tree and we
519 * know where we would insert this node if we wanted to:
520 * we'd put it in in place of the (empty) subtree
521 * n->kids[kcount], and it would have index idx
523 * But the actual element isn't there. So if our search
524 * relation is EQ, we're doomed.
526 if (relation == REL234_EQ)
530 * Otherwise, we must do an index lookup for index idx-1
531 * (if we're going left - LE or LT) or index idx (if we're
532 * going right - GE or GT).
534 if (relation == REL234_LT || relation == REL234_LE) {
540 * We know the index of the element we want; just call index234
541 * to do the rest. This will return NULL if the index is out of
542 * bounds, which is exactly what we want.
544 ret = index234(t, idx);
545 if (ret && index) *index = idx;
548 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
549 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
551 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
552 return findrelpos234(t, e, cmp, relation, NULL);
554 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
555 return findrelpos234(t, e, cmp, REL234_EQ, index);
559 * Tree transformation used in delete and split: move a subtree
560 * right, from child ki of a node to the next child. Update k and
561 * index so that they still point to the same place in the
562 * transformed tree. Assumes the destination child is not full, and
563 * that the source child does have a subtree to spare. Can cope if
564 * the destination child is undersized.
568 * [more] a A b B c d D e [more] a A b c C d D e
572 * [more] a A b B c d [more] a A b c C d
574 static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) {
576 int i, srclen, adjust;
579 dest = n->kids[ki+1];
581 LOG((" trans234_subtree_right(%p, %d):\n", n, ki));
582 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
584 n->kids[0], n->counts[0], n->elems[0],
585 n->kids[1], n->counts[1], n->elems[1],
586 n->kids[2], n->counts[2], n->elems[2],
587 n->kids[3], n->counts[3]));
588 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
590 src->kids[0], src->counts[0], src->elems[0],
591 src->kids[1], src->counts[1], src->elems[1],
592 src->kids[2], src->counts[2], src->elems[2],
593 src->kids[3], src->counts[3]));
594 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
596 dest->kids[0], dest->counts[0], dest->elems[0],
597 dest->kids[1], dest->counts[1], dest->elems[1],
598 dest->kids[2], dest->counts[2], dest->elems[2],
599 dest->kids[3], dest->counts[3]));
601 * Move over the rest of the destination node to make space.
603 dest->kids[3] = dest->kids[2]; dest->counts[3] = dest->counts[2];
604 dest->elems[2] = dest->elems[1];
605 dest->kids[2] = dest->kids[1]; dest->counts[2] = dest->counts[1];
606 dest->elems[1] = dest->elems[0];
607 dest->kids[1] = dest->kids[0]; dest->counts[1] = dest->counts[0];
609 /* which element to move over */
610 i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0);
612 dest->elems[0] = n->elems[ki];
613 n->elems[ki] = src->elems[i];
614 src->elems[i] = NULL;
616 dest->kids[0] = src->kids[i+1]; dest->counts[0] = src->counts[i+1];
617 src->kids[i+1] = NULL; src->counts[i+1] = 0;
619 if (dest->kids[0]) dest->kids[0]->parent = dest;
621 adjust = dest->counts[0] + 1;
623 n->counts[ki] -= adjust;
624 n->counts[ki+1] += adjust;
626 srclen = n->counts[ki];
629 LOG((" before: k,index = %d,%d\n", (*k), (*index)));
630 if ((*k) == ki && (*index) > srclen) {
631 (*index) -= srclen + 1;
633 } else if ((*k) == ki+1) {
636 LOG((" after: k,index = %d,%d\n", (*k), (*index)));
639 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
641 n->kids[0], n->counts[0], n->elems[0],
642 n->kids[1], n->counts[1], n->elems[1],
643 n->kids[2], n->counts[2], n->elems[2],
644 n->kids[3], n->counts[3]));
645 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
647 src->kids[0], src->counts[0], src->elems[0],
648 src->kids[1], src->counts[1], src->elems[1],
649 src->kids[2], src->counts[2], src->elems[2],
650 src->kids[3], src->counts[3]));
651 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
653 dest->kids[0], dest->counts[0], dest->elems[0],
654 dest->kids[1], dest->counts[1], dest->elems[1],
655 dest->kids[2], dest->counts[2], dest->elems[2],
656 dest->kids[3], dest->counts[3]));
660 * Tree transformation used in delete and split: move a subtree
661 * left, from child ki of a node to the previous child. Update k
662 * and index so that they still point to the same place in the
663 * transformed tree. Assumes the destination child is not full, and
664 * that the source child does have a subtree to spare. Can cope if
665 * the destination child is undersized.
669 * a A b c C d D e [more] a A b B c d D e [more]
673 * a b B c C d [more] a A b c C d [more]
675 static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) {
680 dest = n->kids[ki-1];
682 LOG((" trans234_subtree_left(%p, %d):\n", n, ki));
683 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
685 n->kids[0], n->counts[0], n->elems[0],
686 n->kids[1], n->counts[1], n->elems[1],
687 n->kids[2], n->counts[2], n->elems[2],
688 n->kids[3], n->counts[3]));
689 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
691 dest->kids[0], dest->counts[0], dest->elems[0],
692 dest->kids[1], dest->counts[1], dest->elems[1],
693 dest->kids[2], dest->counts[2], dest->elems[2],
694 dest->kids[3], dest->counts[3]));
695 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
697 src->kids[0], src->counts[0], src->elems[0],
698 src->kids[1], src->counts[1], src->elems[1],
699 src->kids[2], src->counts[2], src->elems[2],
700 src->kids[3], src->counts[3]));
702 /* where in dest to put it */
703 i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0);
704 dest->elems[i] = n->elems[ki-1];
705 n->elems[ki-1] = src->elems[0];
707 dest->kids[i+1] = src->kids[0]; dest->counts[i+1] = src->counts[0];
709 if (dest->kids[i+1]) dest->kids[i+1]->parent = dest;
712 * Move over the rest of the source node.
714 src->kids[0] = src->kids[1]; src->counts[0] = src->counts[1];
715 src->elems[0] = src->elems[1];
716 src->kids[1] = src->kids[2]; src->counts[1] = src->counts[2];
717 src->elems[1] = src->elems[2];
718 src->kids[2] = src->kids[3]; src->counts[2] = src->counts[3];
719 src->elems[2] = NULL;
720 src->kids[3] = NULL; src->counts[3] = 0;
722 adjust = dest->counts[i+1] + 1;
724 n->counts[ki] -= adjust;
725 n->counts[ki-1] += adjust;
728 LOG((" before: k,index = %d,%d\n", (*k), (*index)));
732 (*index) += n->counts[ki-1] + 1;
736 LOG((" after: k,index = %d,%d\n", (*k), (*index)));
739 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
741 n->kids[0], n->counts[0], n->elems[0],
742 n->kids[1], n->counts[1], n->elems[1],
743 n->kids[2], n->counts[2], n->elems[2],
744 n->kids[3], n->counts[3]));
745 LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
747 dest->kids[0], dest->counts[0], dest->elems[0],
748 dest->kids[1], dest->counts[1], dest->elems[1],
749 dest->kids[2], dest->counts[2], dest->elems[2],
750 dest->kids[3], dest->counts[3]));
751 LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
753 src->kids[0], src->counts[0], src->elems[0],
754 src->kids[1], src->counts[1], src->elems[1],
755 src->kids[2], src->counts[2], src->elems[2],
756 src->kids[3], src->counts[3]));
760 * Tree transformation used in delete and split: merge child nodes
761 * ki and ki+1 of a node. Update k and index so that they still
762 * point to the same place in the transformed tree. Assumes both
763 * children _are_ sufficiently small.
767 * a A b c C d a A b B c C d
769 * This routine can also cope with either child being undersized:
777 * a b B c C d a A b B c C d
779 static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) {
780 node234 *left, *right;
781 int i, leftlen, rightlen, lsize, rsize;
783 left = n->kids[ki]; leftlen = n->counts[ki];
784 right = n->kids[ki+1]; rightlen = n->counts[ki+1];
786 LOG((" trans234_subtree_merge(%p, %d):\n", n, ki));
787 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
789 n->kids[0], n->counts[0], n->elems[0],
790 n->kids[1], n->counts[1], n->elems[1],
791 n->kids[2], n->counts[2], n->elems[2],
792 n->kids[3], n->counts[3]));
793 LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
795 left->kids[0], left->counts[0], left->elems[0],
796 left->kids[1], left->counts[1], left->elems[1],
797 left->kids[2], left->counts[2], left->elems[2],
798 left->kids[3], left->counts[3]));
799 LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
801 right->kids[0], right->counts[0], right->elems[0],
802 right->kids[1], right->counts[1], right->elems[1],
803 right->kids[2], right->counts[2], right->elems[2],
804 right->kids[3], right->counts[3]));
806 assert(!left->elems[2] && !right->elems[2]); /* neither is large! */
807 lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0);
808 rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0);
810 left->elems[lsize] = n->elems[ki];
812 for (i = 0; i < rsize+1; i++) {
813 left->kids[lsize+1+i] = right->kids[i];
814 left->counts[lsize+1+i] = right->counts[i];
815 if (left->kids[lsize+1+i])
816 left->kids[lsize+1+i]->parent = left;
818 left->elems[lsize+1+i] = right->elems[i];
821 n->counts[ki] += rightlen + 1;
826 * Move the rest of n up by one.
828 for (i = ki+1; i < 3; i++) {
829 n->kids[i] = n->kids[i+1];
830 n->counts[i] = n->counts[i+1];
832 for (i = ki; i < 2; i++) {
833 n->elems[i] = n->elems[i+1];
840 LOG((" before: k,index = %d,%d\n", (*k), (*index)));
843 (*index) += leftlen + 1;
844 } else if ((*k) > ki+1) {
847 LOG((" after: k,index = %d,%d\n", (*k), (*index)));
850 LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
852 n->kids[0], n->counts[0], n->elems[0],
853 n->kids[1], n->counts[1], n->elems[1],
854 n->kids[2], n->counts[2], n->elems[2],
855 n->kids[3], n->counts[3]));
856 LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
858 left->kids[0], left->counts[0], left->elems[0],
859 left->kids[1], left->counts[1], left->elems[1],
860 left->kids[2], left->counts[2], left->elems[2],
861 left->kids[3], left->counts[3]));
866 * Delete an element e in a 2-3-4 tree. Does not free the element,
867 * merely removes all links to it from the tree nodes.
869 static void *delpos234_internal(tree234 *t, int index) {
876 n = t->root; /* by assumption this is non-NULL */
877 LOG(("deleting item %d from tree %p\n", index, t));
881 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
883 n->kids[0], n->counts[0], n->elems[0],
884 n->kids[1], n->counts[1], n->elems[1],
885 n->kids[2], n->counts[2], n->elems[2],
886 n->kids[3], n->counts[3],
888 if (index <= n->counts[0]) {
890 } else if (index -= n->counts[0]+1, index <= n->counts[1]) {
892 } else if (index -= n->counts[1]+1, index <= n->counts[2]) {
894 } else if (index -= n->counts[2]+1, index <= n->counts[3]) {
897 assert(0); /* can't happen */
901 break; /* n is a leaf node; we're here! */
904 * Check to see if we've found our target element. If so,
905 * we must choose a new target (we'll use the old target's
906 * successor, which will be in a leaf), move it into the
907 * place of the old one, continue down to the leaf and
908 * delete the old copy of the new target.
910 if (index == n->counts[ki]) {
912 LOG((" found element in internal node, index %d\n", ki));
913 assert(n->elems[ki]); /* must be a kid _before_ an element */
915 for (m = n->kids[ki]; m->kids[0]; m = m->kids[0])
917 LOG((" replacing with element \"%s\" from leaf node %p\n",
919 retval = n->elems[ki-1];
920 n->elems[ki-1] = m->elems[0];
924 * Recurse down to subtree ki. If it has only one element,
925 * we have to do some transformation to start with.
927 LOG((" moving to subtree %d\n", ki));
929 if (!sub->elems[1]) {
930 LOG((" subtree has only one element!\n"));
931 if (ki > 0 && n->kids[ki-1]->elems[1]) {
933 * Child ki has only one element, but child
934 * ki-1 has two or more. So we need to move a
935 * subtree from ki-1 to ki.
937 trans234_subtree_right(n, ki-1, &ki, &index);
938 } else if (ki < 3 && n->kids[ki+1] &&
939 n->kids[ki+1]->elems[1]) {
941 * Child ki has only one element, but ki+1 has
942 * two or more. Move a subtree from ki+1 to ki.
944 trans234_subtree_left(n, ki+1, &ki, &index);
947 * ki is small with only small neighbours. Pick a
948 * neighbour and merge with it.
950 trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index);
955 * The root is empty and needs to be
958 LOG((" shifting root!\n"));
973 * Now n is a leaf node, and ki marks the element number we
974 * want to delete. We've already arranged for the leaf to be
975 * bigger than minimum size, so let's just go to it.
979 retval = n->elems[ki];
981 for (i = ki; i < 2 && n->elems[i+1]; i++)
982 n->elems[i] = n->elems[i+1];
986 * It's just possible that we have reduced the leaf to zero
987 * size. This can only happen if it was the root - so destroy
988 * it and make the tree empty.
991 LOG((" removed last element in tree, destroying empty root\n"));
992 assert(n == t->root);
997 return retval; /* finished! */
999 void *delpos234(tree234 *t, int index) {
1000 if (index < 0 || index >= countnode234(t->root))
1002 return delpos234_internal(t, index);
1004 void *del234(tree234 *t, void *e) {
1006 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
1007 return NULL; /* it wasn't in there anyway */
1008 return delpos234_internal(t, index); /* it's there; delete it. */
1012 * Join two subtrees together with a separator element between
1013 * them, given their relative height.
1015 * (Height<0 means the left tree is shorter, >0 means the right
1016 * tree is shorter, =0 means (duh) they're equal.)
1018 * It is assumed that any checks needed on the ordering criterion
1019 * have _already_ been done.
1021 * The value returned in `height' is 0 or 1 depending on whether the
1022 * resulting tree is the same height as the original larger one, or
1025 static node234 *join234_internal(node234 *left, void *sep,
1026 node234 *right, int *height) {
1027 node234 *root, *node;
1028 int relht = *height;
1031 LOG((" join: joining %p \"%s\" %p, relative height is %d\n",
1032 left, sep, right, relht));
1035 * The trees are the same height. Create a new one-element
1036 * root containing the separator and pointers to the two
1040 newroot = snew(node234);
1041 newroot->kids[0] = left; newroot->counts[0] = countnode234(left);
1042 newroot->elems[0] = sep;
1043 newroot->kids[1] = right; newroot->counts[1] = countnode234(right);
1044 newroot->elems[1] = NULL;
1045 newroot->kids[2] = NULL; newroot->counts[2] = 0;
1046 newroot->elems[2] = NULL;
1047 newroot->kids[3] = NULL; newroot->counts[3] = 0;
1048 newroot->parent = NULL;
1049 if (left) left->parent = newroot;
1050 if (right) right->parent = newroot;
1052 LOG((" join: same height, brand new root\n"));
1057 * This now works like the addition algorithm on the larger
1058 * tree. We're replacing a single kid pointer with two kid
1059 * pointers separated by an element; if that causes the node to
1060 * overload, we split it in two, move a separator element up to
1061 * the next node, and repeat.
1065 * Left tree is shorter. Search down the right tree to find
1066 * the pointer we're inserting at.
1068 node = root = right;
1069 while (++relht < 0) {
1070 node = node->kids[0];
1073 right = node->kids[ki];
1076 * Right tree is shorter; search down the left to find the
1077 * pointer we're inserting at.
1080 while (--relht > 0) {
1082 node = node->kids[3];
1083 else if (node->elems[1])
1084 node = node->kids[2];
1086 node = node->kids[1];
1090 else if (node->elems[1])
1094 left = node->kids[ki];
1098 * Now proceed as for addition.
1100 *height = add234_insert(left, sep, right, &root, node, ki);
1104 static int height234(tree234 *t) {
1106 node234 *n = t->root;
1113 tree234 *join234(tree234 *t1, tree234 *t2) {
1114 int size2 = countnode234(t2->root);
1120 element = index234(t2, 0);
1121 element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
1126 element = delpos234(t2, 0);
1127 relht = height234(t1) - height234(t2);
1128 t1->root = join234_internal(t1->root, element, t2->root, &relht);
1133 tree234 *join234r(tree234 *t1, tree234 *t2) {
1134 int size1 = countnode234(t1->root);
1140 element = index234(t1, size1-1);
1141 element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
1146 element = delpos234(t1, size1-1);
1147 relht = height234(t1) - height234(t2);
1148 t2->root = join234_internal(t1->root, element, t2->root, &relht);
1155 * Split out the first <index> elements in a tree and return a
1156 * pointer to the root node. Leave the root node of the remainder
1159 static node234 *split234_internal(tree234 *t, int index) {
1160 node234 *halves[2], *n, *sib, *sub;
1161 node234 *lparent, *rparent;
1162 int ki, pki, i, half, lcount, rcount;
1165 LOG(("splitting tree %p at point %d\n", t, index));
1168 * Easy special cases. After this we have also dealt completely
1169 * with the empty-tree case and we can assume the root exists.
1171 if (index == 0) /* return nothing */
1173 if (index == countnode234(t->root)) { /* return the whole tree */
1174 node234 *ret = t->root;
1180 * Search down the tree to find the split point.
1182 lparent = rparent = NULL;
1185 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
1187 n->kids[0], n->counts[0], n->elems[0],
1188 n->kids[1], n->counts[1], n->elems[1],
1189 n->kids[2], n->counts[2], n->elems[2],
1190 n->kids[3], n->counts[3],
1193 rcount = countnode234(n) - lcount;
1194 if (index <= n->counts[0]) {
1196 } else if (index -= n->counts[0]+1, index <= n->counts[1]) {
1198 } else if (index -= n->counts[1]+1, index <= n->counts[2]) {
1201 index -= n->counts[2]+1;
1205 LOG((" splitting at subtree %d\n", ki));
1208 LOG((" splitting at child index %d\n", ki));
1211 * Split the node, put halves[0] on the right of the left
1212 * one and halves[1] on the left of the right one, put the
1213 * new node pointers in halves[0] and halves[1], and go up
1216 sib = snew(node234);
1217 for (i = 0; i < 3; i++) {
1218 if (i+ki < 3 && n->elems[i+ki]) {
1219 sib->elems[i] = n->elems[i+ki];
1220 sib->kids[i+1] = n->kids[i+ki+1];
1221 if (sib->kids[i+1]) sib->kids[i+1]->parent = sib;
1222 sib->counts[i+1] = n->counts[i+ki+1];
1223 n->elems[i+ki] = NULL;
1224 n->kids[i+ki+1] = NULL;
1225 n->counts[i+ki+1] = 0;
1227 sib->elems[i] = NULL;
1228 sib->kids[i+1] = NULL;
1229 sib->counts[i+1] = 0;
1233 lparent->kids[pki] = n;
1234 lparent->counts[pki] = lcount;
1235 n->parent = lparent;
1236 rparent->kids[0] = sib;
1237 rparent->counts[0] = rcount;
1238 sib->parent = rparent;
1248 LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1250 n->kids[0], n->counts[0], n->elems[0],
1251 n->kids[1], n->counts[1], n->elems[1],
1252 n->kids[2], n->counts[2], n->elems[2],
1253 n->kids[3], n->counts[3]));
1254 LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1256 sib->kids[0], sib->counts[0], sib->elems[0],
1257 sib->kids[1], sib->counts[1], sib->elems[1],
1258 sib->kids[2], sib->counts[2], sib->elems[2],
1259 sib->kids[3], sib->counts[3]));
1265 * We've come off the bottom here, so we've successfully split
1266 * the tree into two equally high subtrees. The only problem is
1267 * that some of the nodes down the fault line will be smaller
1268 * than the minimum permitted size. (Since this is a 2-3-4
1269 * tree, that means they'll be zero-element one-child nodes.)
1271 LOG((" fell off bottom, lroot is %p, rroot is %p\n",
1272 halves[0], halves[1]));
1273 lparent->counts[pki] = rparent->counts[0] = 0;
1274 lparent->kids[pki] = rparent->kids[0] = NULL;
1277 * So now we go back down the tree from each of the two roots,
1278 * fixing up undersize nodes.
1280 for (half = 0; half < 2; half++) {
1282 * Remove the root if it's undersize (it will contain only
1283 * one child pointer, so just throw it away and replace it
1284 * with its child). This might happen several times.
1286 while (halves[half] && !halves[half]->elems[0]) {
1287 LOG((" root %p is undersize, throwing away\n", halves[half]));
1288 halves[half] = halves[half]->kids[0];
1289 sfree(halves[half]->parent);
1290 halves[half]->parent = NULL;
1291 LOG((" new root is %p\n", halves[half]));
1296 void (*toward)(node234 *n, int ki, int *k, int *index);
1300 * Now we have a potentially undersize node on the
1301 * right (if half==0) or left (if half==1). Sort it
1302 * out, by merging with a neighbour or by transferring
1303 * subtrees over. At this time we must also ensure that
1304 * nodes are bigger than minimum, in case we need an
1305 * element to merge two nodes below.
1307 LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1309 n->kids[0], n->counts[0], n->elems[0],
1310 n->kids[1], n->counts[1], n->elems[1],
1311 n->kids[2], n->counts[2], n->elems[2],
1312 n->kids[3], n->counts[3]));
1314 ki = 0; /* the kid we're interested in */
1315 ni = 1; /* the neighbour */
1316 merge = 0; /* for merge: leftmost of the two */
1317 toward = trans234_subtree_left;
1319 ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1);
1322 toward = trans234_subtree_right;
1326 if (sub && !sub->elems[1]) {
1328 * This node is undersized or minimum-size. If we
1329 * can merge it with its neighbour, we do so;
1330 * otherwise we must be able to transfer subtrees
1331 * over to it until it is greater than minimum
1334 int undersized = (!sub->elems[0]);
1335 LOG((" child %d is %ssize\n", ki,
1336 undersized ? "under" : "minimum-"));
1337 LOG((" neighbour is %s\n",
1338 n->kids[ni]->elems[2] ? "large" :
1339 n->kids[ni]->elems[1] ? "medium" : "small"));
1340 if (!n->kids[ni]->elems[1] ||
1341 (undersized && !n->kids[ni]->elems[2])) {
1343 * Neighbour is small, or possibly neighbour is
1344 * medium and we are undersize.
1346 trans234_subtree_merge(n, merge, NULL, NULL);
1347 sub = n->kids[merge];
1350 * n is empty, and hence must have been the
1351 * root and needs to be removed.
1354 LOG((" shifting root!\n"));
1356 halves[half]->parent = NULL;
1360 /* Neighbour is big enough to move trees over. */
1361 toward(n, ni, NULL, NULL);
1363 toward(n, ni, NULL, NULL);
1370 t->root = halves[1];
1373 tree234 *splitpos234(tree234 *t, int index, int before) {
1378 count = countnode234(t->root);
1379 if (index < 0 || index > count)
1380 return NULL; /* error */
1381 ret = newtree234(t->cmp);
1382 n = split234_internal(t, index);
1384 /* We want to return the ones before the index. */
1388 * We want to keep the ones before the index and return the
1391 ret->root = t->root;
1396 tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) {
1400 assert(rel != REL234_EQ);
1402 if (rel == REL234_GT || rel == REL234_GE) {
1404 rel = (rel == REL234_GT ? REL234_LE : REL234_LT);
1408 if (!findrelpos234(t, e, cmp, rel, &index))
1411 return splitpos234(t, index+1, before);
1414 static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) {
1416 node234 *n2 = snew(node234);
1418 for (i = 0; i < 3; i++) {
1419 if (n->elems[i] && copyfn)
1420 n2->elems[i] = copyfn(copyfnstate, n->elems[i]);
1422 n2->elems[i] = n->elems[i];
1425 for (i = 0; i < 4; i++) {
1427 n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate);
1428 n2->kids[i]->parent = n2;
1432 n2->counts[i] = n->counts[i];
1437 tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) {
1440 t2 = newtree234(t->cmp);
1441 t2->root = copynode234(t->root, copyfn, copyfnstate);
1442 t2->root->parent = NULL;
1450 * Test code for the 2-3-4 tree. This code maintains an alternative
1451 * representation of the data in the tree, in an array (using the
1452 * obvious and slow insert and delete functions). After each tree
1453 * operation, the verify() function is called, which ensures all
1454 * the tree properties are preserved:
1455 * - node->child->parent always equals node
1456 * - tree->root->parent always equals NULL
1457 * - number of kids == 0 or number of elements + 1;
1458 * - tree has the same depth everywhere
1459 * - every node has at least one element
1460 * - subtree element counts are accurate
1461 * - any NULL kid pointer is accompanied by a zero count
1462 * - in a sorted tree: ordering property between elements of a
1463 * node and elements of its children is preserved
1464 * and also ensures the list represented by the tree is the same
1465 * list it should be. (This last check also doubly verifies the
1466 * ordering properties, because the `same list it should be' is by
1467 * definition correctly ordered. It also ensures all nodes are
1468 * distinct, because the enum functions would get caught in a loop
1474 #define srealloc realloc
1477 * Error reporting function.
1479 void error(char *fmt, ...) {
1483 vfprintf(stdout, fmt, ap);
1488 /* The array representation of the data. */
1490 int arraylen, arraysize;
1493 /* The tree representation of the same data. */
1497 * Routines to provide a diagnostic printout of a tree. Currently
1498 * relies on every element in the tree being a one-character string
1505 int dispnode(node234 *n, int level, dispctx *ctx) {
1507 int xpos = strlen(ctx->levels[0]);
1511 len = sprintf(ctx->levels[0]+xpos, " %s%s%s",
1512 n->elems[0], n->elems[1], n->elems[2]);
1513 else if (n->elems[1])
1514 len = sprintf(ctx->levels[0]+xpos, " %s%s",
1515 n->elems[0], n->elems[1]);
1517 len = sprintf(ctx->levels[0]+xpos, " %s",
1519 return xpos + 1 + (len-1) / 2;
1522 int nodelen, mypos, myleft, x, i;
1524 xpos[0] = dispnode(n->kids[0], level-3, ctx);
1525 xpos[1] = dispnode(n->kids[1], level-3, ctx);
1528 xpos[2] = dispnode(n->kids[2], level-3, ctx);
1532 xpos[3] = dispnode(n->kids[3], level-3, ctx);
1537 mypos = (xpos[1] + xpos[2]) / 2;
1538 else if (nkids == 3)
1541 mypos = (xpos[0] + xpos[1]) / 2;
1542 nodelen = nkids * 2 - 1;
1543 myleft = mypos - ((nodelen-1)/2);
1544 assert(myleft >= xpos[0]);
1545 assert(myleft + nodelen-1 <= xpos[nkids-1]);
1547 x = strlen(ctx->levels[level]);
1548 while (x <= xpos[0] && x < myleft)
1549 ctx->levels[level][x++] = ' ';
1551 ctx->levels[level][x++] = '_';
1553 x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.",
1554 n->elems[0], n->elems[1], n->elems[2]);
1556 x += sprintf(ctx->levels[level]+x, ".%s.%s.",
1557 n->elems[0], n->elems[1]);
1559 x += sprintf(ctx->levels[level]+x, ".%s.",
1561 while (x < xpos[nkids-1])
1562 ctx->levels[level][x++] = '_';
1563 ctx->levels[level][x] = '\0';
1565 x = strlen(ctx->levels[level-1]);
1566 for (i = 0; i < nkids; i++) {
1569 if (i > 0 && i < nkids-1)
1575 while (x < pos && x < rpos)
1576 ctx->levels[level-1][x++] = ' ';
1578 ctx->levels[level-1][x++] = '|';
1579 while (x < pos || x < rpos)
1580 ctx->levels[level-1][x++] = '_';
1582 ctx->levels[level-1][x++] = '|';
1584 ctx->levels[level-1][x] = '\0';
1586 x = strlen(ctx->levels[level-2]);
1587 for (i = 0; i < nkids; i++) {
1591 ctx->levels[level-2][x++] = ' ';
1592 ctx->levels[level-2][x++] = '|';
1594 ctx->levels[level-2][x] = '\0';
1600 void disptree(tree234 *t) {
1603 int width = count234(t);
1604 int ht = height234(t) * 3 - 2;
1608 printf("[empty tree]\n");
1611 leveldata = smalloc(ht * (width+2));
1612 ctx.levels = smalloc(ht * sizeof(char *));
1613 for (i = 0; i < ht; i++) {
1614 ctx.levels[i] = leveldata + i * (width+2);
1615 ctx.levels[i][0] = '\0';
1618 (void) dispnode(t->root, ht-1, &ctx);
1621 printf("%s\n", ctx.levels[i]);
1632 int chknode(chkctx *ctx, int level, node234 *node,
1633 void *lowbound, void *highbound) {
1638 /* Count the non-NULL kids. */
1639 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
1640 /* Ensure no kids beyond the first NULL are non-NULL. */
1641 for (i = nkids; i < 4; i++)
1642 if (node->kids[i]) {
1643 error("node %p: nkids=%d but kids[%d] non-NULL",
1645 } else if (node->counts[i]) {
1646 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1647 node, i, i, node->counts[i]);
1650 /* Count the non-NULL elements. */
1651 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
1652 /* Ensure no elements beyond the first NULL are non-NULL. */
1653 for (i = nelems; i < 3; i++)
1654 if (node->elems[i]) {
1655 error("node %p: nelems=%d but elems[%d] non-NULL",
1661 * If nkids==0, this is a leaf node; verify that the tree
1662 * depth is the same everywhere.
1664 if (ctx->treedepth < 0)
1665 ctx->treedepth = level; /* we didn't know the depth yet */
1666 else if (ctx->treedepth != level)
1667 error("node %p: leaf at depth %d, previously seen depth %d",
1668 node, level, ctx->treedepth);
1671 * If nkids != 0, then it should be nelems+1, unless nelems
1672 * is 0 in which case nkids should also be 0 (and so we
1673 * shouldn't be in this condition at all).
1675 int shouldkids = (nelems ? nelems+1 : 0);
1676 if (nkids != shouldkids) {
1677 error("node %p: %d elems should mean %d kids but has %d",
1678 node, nelems, shouldkids, nkids);
1683 * nelems should be at least 1.
1686 error("node %p: no elems", node, nkids);
1690 * Add nelems to the running element count of the whole tree.
1692 ctx->elemcount += nelems;
1695 * Check ordering property: all elements should be strictly >
1696 * lowbound, strictly < highbound, and strictly < each other in
1697 * sequence. (lowbound and highbound are NULL at edges of tree
1698 * - both NULL at root node - and NULL is considered to be <
1699 * everything and > everything. IYSWIM.)
1702 for (i = -1; i < nelems; i++) {
1703 void *lower = (i == -1 ? lowbound : node->elems[i]);
1704 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1705 if (lower && higher && cmp(lower, higher) >= 0) {
1706 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1707 node, i, lower, i+1, higher);
1713 * Check parent pointers: all non-NULL kids should have a
1714 * parent pointer coming back to this node.
1716 for (i = 0; i < nkids; i++)
1717 if (node->kids[i]->parent != node) {
1718 error("node %p kid %d: parent ptr is %p not %p",
1719 node, i, node->kids[i]->parent, node);
1724 * Now (finally!) recurse into subtrees.
1728 for (i = 0; i < nkids; i++) {
1729 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1730 void *higher = (i >= nelems ? highbound : node->elems[i]);
1731 int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1732 if (node->counts[i] != subcount) {
1733 error("node %p kid %d: count says %d, subtree really has %d",
1734 node, i, node->counts[i], subcount);
1742 void verifytree(tree234 *tree, void **array, int arraylen) {
1747 ctx.treedepth = -1; /* depth unknown yet */
1748 ctx.elemcount = 0; /* no elements seen yet */
1750 * Verify validity of tree properties.
1753 if (tree->root->parent != NULL)
1754 error("root->parent is %p should be null", tree->root->parent);
1755 chknode(&ctx, 0, tree->root, NULL, NULL);
1757 printf("tree depth: %d\n", ctx.treedepth);
1759 * Enumerate the tree and ensure it matches up to the array.
1761 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1763 error("tree contains more than %d elements", arraylen);
1765 error("enum at position %d: array says %s, tree says %s",
1768 if (ctx.elemcount != i) {
1769 error("tree really contains %d elements, enum gave %d",
1773 error("enum gave only %d elements, array has %d", i, arraylen);
1776 if (ctx.elemcount != i) {
1777 error("tree really contains %d elements, count234 gave %d",
1781 void verify(void) { verifytree(tree, array, arraylen); }
1783 void internal_addtest(void *elem, int index, void *realret) {
1787 if (arraysize < arraylen+1) {
1788 arraysize = arraylen+1+256;
1789 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1790 srealloc(array, arraysize*sizeof(*array)));
1794 /* now i points to the first element >= elem */
1795 retval = elem; /* expect elem returned (success) */
1796 for (j = arraylen; j > i; j--)
1797 array[j] = array[j-1];
1798 array[i] = elem; /* add elem to array */
1801 if (realret != retval) {
1802 error("add: retval was %p expected %p", realret, retval);
1808 void addtest(void *elem) {
1812 realret = add234(tree, elem);
1815 while (i < arraylen && cmp(elem, array[i]) > 0)
1817 if (i < arraylen && !cmp(elem, array[i])) {
1818 void *retval = array[i]; /* expect that returned not elem */
1819 if (realret != retval) {
1820 error("add: retval was %p expected %p", realret, retval);
1823 internal_addtest(elem, i, realret);
1826 void addpostest(void *elem, int i) {
1829 realret = addpos234(tree, elem, i);
1831 internal_addtest(elem, i, realret);
1834 void delpostest(int i) {
1836 void *elem = array[i], *ret;
1838 /* i points to the right element */
1839 while (i < arraylen-1) {
1840 array[i] = array[i+1];
1843 arraylen--; /* delete elem from array */
1846 ret = del234(tree, elem);
1848 ret = delpos234(tree, index);
1851 error("del returned %p, expected %p", ret, elem);
1857 void deltest(void *elem) {
1861 while (i < arraylen && cmp(elem, array[i]) > 0)
1863 if (i >= arraylen || cmp(elem, array[i]) != 0)
1864 return; /* don't do it! */
1868 /* A sample data set and test utility. Designed for pseudo-randomness,
1869 * and yet repeatability. */
1872 * This random number generator uses the `portable implementation'
1873 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1876 int randomnumber(unsigned *seed) {
1877 *seed *= 1103515245;
1879 return ((*seed) / 65536) % 32768;
1882 int mycmp(void *av, void *bv) {
1883 char const *a = (char const *)av;
1884 char const *b = (char const *)bv;
1885 return strcmp(a, b);
1888 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1891 "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
1892 "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
1893 "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
1894 "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
1897 "a", "ab", "absque", "coram", "de",
1898 "palam", "clam", "cum", "ex", "e",
1899 "sine", "tenus", "pro", "prae",
1900 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1901 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1902 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1903 "murfl", "spoo", "breen", "flarn", "octothorpe",
1904 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1905 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1906 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1907 "wand", "ring", "amulet"
1911 #define NSTR lenof(strings)
1913 void findtest(void) {
1914 static const int rels[] = {
1915 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1917 static const char *const relnames[] = {
1918 "EQ", "GE", "LE", "LT", "GT"
1920 int i, j, rel, index;
1921 char *p, *ret, *realret, *realret2;
1924 for (i = 0; i < (int)NSTR; i++) {
1926 for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
1929 lo = 0; hi = arraylen-1;
1931 mid = (lo + hi) / 2;
1932 c = strcmp(p, array[mid]);
1942 if (rel == REL234_LT)
1943 ret = (mid > 0 ? array[--mid] : NULL);
1944 else if (rel == REL234_GT)
1945 ret = (mid < arraylen-1 ? array[++mid] : NULL);
1950 if (rel == REL234_LT || rel == REL234_LE) {
1952 ret = (hi >= 0 ? array[hi] : NULL);
1953 } else if (rel == REL234_GT || rel == REL234_GE) {
1955 ret = (lo < arraylen ? array[lo] : NULL);
1960 realret = findrelpos234(tree, p, NULL, rel, &index);
1961 if (realret != ret) {
1962 error("find(\"%s\",%s) gave %s should be %s",
1963 p, relnames[j], realret, ret);
1965 if (realret && index != mid) {
1966 error("find(\"%s\",%s) gave %d should be %d",
1967 p, relnames[j], index, mid);
1969 if (realret && rel == REL234_EQ) {
1970 realret2 = index234(tree, index);
1971 if (realret2 != realret) {
1972 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1973 p, relnames[j], realret, index, index, realret2);
1977 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1983 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1984 if (arraylen && (realret != array[0] || index != 0)) {
1985 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1986 realret, index, array[0]);
1987 } else if (!arraylen && (realret != NULL)) {
1988 error("find(NULL,GT) gave %s(%d) should be NULL",
1992 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1993 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1994 error("find(NULL,LT) gave %s(%d) should be %s(0)",
1995 realret, index, array[arraylen-1]);
1996 } else if (!arraylen && (realret != NULL)) {
1997 error("find(NULL,LT) gave %s(%d) should be NULL",
2002 void splittest(tree234 *tree, void **array, int arraylen) {
2004 tree234 *tree3, *tree4;
2005 for (i = 0; i <= arraylen; i++) {
2006 tree3 = copytree234(tree, NULL, NULL);
2007 tree4 = splitpos234(tree3, i, 0);
2008 verifytree(tree3, array, i);
2009 verifytree(tree4, array+i, arraylen-i);
2010 join234(tree3, tree4);
2011 freetree234(tree4); /* left empty by join */
2012 verifytree(tree3, array, arraylen);
2020 int tworoot, tmplen;
2022 tree234 *tree2, *tree3, *tree4;
2025 setvbuf(stdout, NULL, _IOLBF, 0);
2027 for (i = 0; i < (int)NSTR; i++) in[i] = 0;
2029 arraylen = arraysize = 0;
2030 tree = newtree234(mycmp);
2034 for (i = 0; i < 10000; i++) {
2035 j = randomnumber(&seed);
2037 printf("trial: %d\n", i);
2039 printf("deleting %s (%d)\n", strings[j], j);
2040 deltest(strings[j]);
2043 printf("adding %s (%d)\n", strings[j], j);
2044 addtest(strings[j]);
2051 while (arraylen > 0) {
2052 j = randomnumber(&seed);
2060 * Now try an unsorted tree. We don't really need to test
2061 * delpos234 because we know del234 is based on it, so it's
2062 * already been tested in the above sorted-tree code; but for
2063 * completeness we'll use it to tear down our unsorted tree
2064 * once we've built it.
2066 tree = newtree234(NULL);
2069 for (i = 0; i < 1000; i++) {
2070 printf("trial: %d\n", i);
2071 j = randomnumber(&seed);
2073 k = randomnumber(&seed);
2074 k %= count234(tree)+1;
2075 printf("adding string %s at index %d\n", strings[j], k);
2076 addpostest(strings[j], k);
2080 * While we have this tree in its full form, we'll take a copy
2081 * of it to use in split and join testing.
2083 tree2 = copytree234(tree, NULL, NULL);
2084 verifytree(tree2, array, arraylen);/* check the copy is accurate */
2086 * Split tests. Split the tree at every possible point and
2087 * check the resulting subtrees.
2089 tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */
2090 splittest(tree2, array, arraylen);
2092 * Now do the split test again, but on a tree that has a 2-root
2093 * (if the previous one didn't) or doesn't (if the previous one
2097 while ((!tree2->root->elems[1]) == tworoot) {
2098 delpos234(tree2, --tmplen);
2100 printf("now trying splits on second tree\n");
2101 splittest(tree2, array, tmplen);
2105 * Back to the main testing of uncounted trees.
2107 while (count234(tree) > 0) {
2108 printf("cleanup: tree size %d\n", count234(tree));
2109 j = randomnumber(&seed);
2110 j %= count234(tree);
2111 printf("deleting string %s from index %d\n", (char *)array[j], j);
2117 * Finally, do some testing on split/join on _sorted_ trees. At
2118 * the same time, we'll be testing split on very small trees.
2120 tree = newtree234(mycmp);
2123 for (i = 0; i < 16; i++) {
2124 addtest(strings[i]);
2125 tree2 = copytree234(tree, NULL, NULL);
2126 splittest(tree2, array, arraylen);
2132 * Test silly cases of join: join(emptytree, emptytree), and
2133 * also ensure join correctly spots when sorted trees fail the
2134 * ordering constraint.
2136 tree = newtree234(mycmp);
2137 tree2 = newtree234(mycmp);
2138 tree3 = newtree234(mycmp);
2139 tree4 = newtree234(mycmp);
2140 assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */
2141 add234(tree2, strings[1]);
2142 add234(tree4, strings[0]);
2143 array[0] = strings[0];
2144 array[1] = strings[1];
2145 verifytree(tree, array, 0);
2146 verifytree(tree2, array+1, 1);
2147 verifytree(tree3, array, 0);
2148 verifytree(tree4, array, 1);
2152 * - join(tree,tree3) should leave both tree and tree3 unchanged.
2153 * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2154 * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2155 * - join(tree, tree2) should move the element from tree2 to tree.
2156 * - joinr(tree4, tree3) should move the element from tree4 to tree3.
2157 * - join(tree,tree3) should return NULL and leave both unchanged.
2158 * - join(tree3,tree) should work and create a bigger tree in tree3.
2160 assert(tree == join234(tree, tree3));
2161 verifytree(tree, array, 0);
2162 verifytree(tree3, array, 0);
2163 assert(tree2 == join234r(tree, tree2));
2164 verifytree(tree, array, 0);
2165 verifytree(tree2, array+1, 1);
2166 assert(tree4 == join234(tree4, tree3));
2167 verifytree(tree3, array, 0);
2168 verifytree(tree4, array, 1);
2169 assert(tree == join234(tree, tree2));
2170 verifytree(tree, array+1, 1);
2171 verifytree(tree2, array, 0);
2172 assert(tree3 == join234r(tree4, tree3));
2173 verifytree(tree3, array, 1);
2174 verifytree(tree4, array, 0);
2175 assert(NULL == join234(tree, tree3));
2176 verifytree(tree, array+1, 1);
2177 verifytree(tree3, array, 1);
2178 assert(tree3 == join234(tree3, tree));
2179 verifytree(tree3, array, 2);
2180 verifytree(tree, array, 0);
2187 #if 0 /* sorted list of strings might be useful */
2189 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",