3 * Splitting and joining AVL trees
5 * (c) 2024 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Xyla, a library of binary trees.
12 * Xyla is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Lesser General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
17 * Xyla is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
20 * License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Xyla. If not, see <https://www.gnu.org/licenses/>.
26 /*----- Header files ------------------------------------------------------*/
31 /*----- Main code ---------------------------------------------------------*/
33 int xyla_avl_join(struct xyla_bt_nodecls *cls,
34 struct xyla_bt_node **root_out, int *rootht_out,
35 struct xyla_bt_node **left, int lht,
36 struct xyla_bt_node *mid,
37 struct xyla_bt_node **right, int rht)
39 /* Concatenate the two trees rooted at LEFT and RIGHT in order; if MID is
40 * not null, then place this node between them. The root of the resulting
41 * tree is stored in *ROOT_OUT. It's the caller's responsibility to ensure
42 * that this respects any ordering invariants.
44 * If the heights of the LEFT and RIGHT trees are known, then they can be
45 * passed in as LHT and RHT; otherwise, LHT and/or RHT must be -1. The
46 * height of the combined tree is stored in *ROOTHT_OUT if this is not
49 * The ROOT_OUT pointer may equal either LEFT or RIGHT (or, indeed, both,
50 * only if both are empty). If LEFT is distinct from ROOT_OUT then *LEFT
51 * is set null on exit; similarly, if RIGHT is distinct from ROOT_OUT, then
52 * *RIGHT is set null on exit.
54 * Returns `XYLA_RC_OK'.
57 struct xyla_avl_node *m = AVL_NODE(mid), *p, *n, *c;
58 struct xyla_bt_node **plink, **nlink, *root;
59 xyla_bt_updfn *updfn = cls ? cls->ops->upd : 0;
60 struct xyla_avl_path path;
61 unsigned f, bal, trail;
64 /* The trail. This is a similar idea to `xyla_avl_insert', except that we
65 * use it to track balance annotations on the initial downward pass, rather
66 * than child directions on the later upwards pass. (The latter would be
67 * pointless, because the path always follows the extreme right or left.)
70 #define NODE (0*TRAILSH)
71 #define PARENT (1*TRAILSH)
72 #define BAL(lv) ((trail >> (lv))&3u)
74 /* Determine the heights of the left and right trees if the caller didn't
77 if (lht < 0) lht = xyla_avl_height(AVL_NODE(*left));
78 if (rht < 0) rht = xyla_avl_height(AVL_NODE(*right));
80 /* If we're not given a joining node then take one from the right tree. If
81 * the right tree is empty, then the join is just the left tree and there's
85 if (!rht) { root = *left; ht = lht; goto end; }
86 else if (!lht) { root = *right; ht = rht; goto end; }
88 m = xyla_avl_firstpath(right, &path);
89 rc = xyla_avl_remove(cls, &path);
90 if (rc == XYLA_RC_HTCHG) rht--;
91 else XYLA__ASSERT(!rc);
95 if (rht <= lht + 1 && lht <= rht + 1) {
96 /* The trees have similar heights. We can just join them the stupid
97 * way. The joining node will be the root.
100 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN))
101 fputs("XYLA-AVL JOIN similar heights\n", xyla__verbout); )
102 m->bt.left = *left; m->bt.right = *right;
103 if (lht > rht) { bal = XYLA_AVLBAL_LBIAS; ht = lht + 1; }
104 else if (lht < rht) { bal = XYLA_AVLBAL_RBIAS; ht = rht + 1; }
105 else { bal = XYLA_AVLBAL_EVEN; ht = lht + 1; }
106 m->avl.f = (m->avl.f&~XYLA_AVLF_BALMASK) | bal; root = &m->bt;
107 if (updfn) updfn(cls, &m->bt);
110 #define FIXUP_JOIN(left, lht, LBIAS, right, rht, RBIAS, ERX) do { \
111 /* The left tree is taller. */ \
113 /* Find the tallest rightmost subtree in the left tree which is \
114 * not taller than the right tree. We know that it's not the \
115 * root, because otherwise we'd be in a different case. \
117 path.k = 0; path.p[0] = left; ht = lht; \
118 p = 0; n = AVL_NODE(*left); \
119 trail = n->avl.f&XYLA_AVLF_BALMASK; \
121 /* We have a node n with height ht > rht. (Initially, n is the \
122 * root; its height must be greater, or we'd have done this the \
123 * easy way.) Descend the tree and check again. \
126 ht -= trail&XYLA_AVLBAL_##LBIAS ? 2 : 1; if (ht <= rht) break; \
127 path.p[++path.k] = &n->bt.right; \
128 p = n; n = AVL_NODE(n->bt.right); \
129 trail = (trail << TRAILSH) | (n->avl.f&XYLA_AVLF_BALMASK); \
132 /* The right subtree of n seems like a good candidate. The plan \
133 * is to replace it with a subtree, rooted at the middle node m, \
134 * with n's current right subtree as its left subtree, and \
135 * `*right' as its right subtree. But this is where it starts to \
136 * get tricky. We delay updating the parent node n until the \
139 m->bt.right = *right; \
142 /* The subtree we're replacing is shorter than the right tree. \
143 * We can only get into this situation if its parent n was \
144 * taller, so the node n must be left-biased. We have some \
145 * balance annotations to adjust; and the parent's height has \
146 * increased, so we'll have to ascend the tree. \
156 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
157 fputs("XYLA-AVL JOIN short subtree (" #right " child)\n", \
159 m->bt.left = n->bt.right; n->bt.right = &m->bt; \
160 m->avl.f = (m->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_##RBIAS; \
161 n->avl.f = (n->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_##RBIAS; \
162 if (updfn) { updfn(cls, &m->bt); updfn(cls, &n->bt); } \
163 } else if (BAL(NODE) != XYLA_AVLBAL_##RBIAS) { \
164 /* The focus node was not right-biased, and the subtree we're \
165 * replacing is the same height as the right tree. It's now \
166 * either evenly balanced, in which case we're done, or biased \
167 * to the right, in which case we must ascend. \
182 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
183 fprintf(xyla__verbout, "XYLA-AVL JOIN splice, node %c " \
184 "(" #right " child)\n", AVL_BALCH(n)); ) \
185 m->bt.left = n->bt.right; n->bt.right = &m->bt; \
186 m->avl.f = (m->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
188 (n->avl.f&~XYLA_AVLF_BALMASK) | AVL_REBAL_##ERX(BAL(NODE)); \
189 if (BAL(NODE) == XYLA_AVLBAL_EVEN) { \
190 if (updfn) updfn(cls, &m->bt); \
192 if (updfn) { updfn(cls, &m->bt); updfn(cls, &n->bt); } \
195 } else if (BAL(PARENT) != XYLA_AVLBAL_##RBIAS) { \
196 /* The focus node was evenly balanced or biased to the left. \
197 * There must, in fact, be a parent node. (If the right tree \
198 * has height h, then the focus node has height h + 1; if \
199 * that's the root, then we'd have done this the easy way.) If \
200 * the parent was previously left-biased then it's now even and \
201 * we're done; otherwise, we must ascend the tree. \
207 * h + 1 (+) ---> h + 2 n / \ \
218 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
219 fprintf(xyla__verbout, "XYLA-AVL JOIN splice, " \
220 "node %c, parent %c (" #right " child)\n", \
221 AVL_BALCH(n), AVL_BALCH(p)); ) \
222 m->bt.left = &n->bt; p->bt.right = &m->bt; \
223 m->avl.f = (m->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_##LBIAS; \
225 (p->avl.f&~XYLA_AVLF_BALMASK) | AVL_REBAL_##ERX(BAL(PARENT)); \
227 if (BAL(PARENT) == XYLA_AVLBAL_EVEN) { \
228 if (updfn) { updfn(cls, &n->bt); updfn(cls, &m->bt); } \
232 updfn(cls, &n->bt); updfn(cls, &m->bt); \
233 updfn(cls, &p->bt); \
238 /* The focus node was right-biased. There must, again, be a \
239 * parent node. That parent node was also right-biased. We \
240 * must rearrange the links and then we're done. \
245 * h (+) ---> (-) (=) \
247 * h - 1 h h h - 1 h h \
250 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
251 fprintf(xyla__verbout, "XYLA-AVL JOIN splice, " \
252 "node %c, parent %c (" #right " child)\n", \
253 AVL_BALCH(n), AVL_BALCH(p)); ) \
254 plink = path.p[--path.k]; *plink = &n->bt; \
255 p->bt.right = n->bt.left; n->bt.left = &p->bt; \
256 m->bt.left = n->bt.right; n->bt.right = &m->bt; \
257 m->avl.f = (m->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
258 n->avl.f = (n->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
259 p->avl.f = (p->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_##LBIAS; \
261 updfn(cls, &p->bt); updfn(cls, &m->bt); \
262 updfn(cls, &n->bt); \
268 /* The topmost entry in the path holds a link to a node n which \
269 * (a) has not yet been updated, (b) is biased to the right, \
270 * and (c) is the root of a tree which is taller than it used \
271 * to be. Ascend the tree, fixing balance annotations and \
272 * rearranging links to discharge the anomaly. \
275 /* The current node is the tree root. The tree has grown. */ \
277 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
278 fputs("XYLA-AVL JOIN extend tree (" #right " child)\n", \
280 if (updfn) updfn(cls, &n->bt); \
281 lht++; goto done_##left; \
284 /* Ascend to the parent node. The previous focus node was our \
287 c = n; nlink = path.p[--path.k]; n = AVL_NODE(*nlink); \
288 f = n->avl.f; bal = f&XYLA_AVLF_BALMASK; \
290 if (bal == XYLA_AVLBAL_##RBIAS) { \
291 /* The node was biased to the right. We must rearrange some \
292 * links, and we're done. \
302 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
303 fprintf(xyla__verbout, "XYLA-AVL JOIN ascent, node %c " \
304 "(" #right " child)\n", AVL_BALCH(n)); ) \
305 n->bt.right = c->bt.left; c->bt.left = &n->bt; \
307 c->avl.f = (c->avl.f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
308 n->avl.f = (f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
309 if (updfn) { updfn(cls, &n->bt); updfn(cls, &c->bt); } \
311 } else if (bal == XYLA_AVLBAL_##LBIAS) { \
312 /* The node was biased to the left. It's now evenly \
313 * balanced, and we're done. \
318 * / \ c ---> h + 1 (+) \
323 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
324 fprintf(xyla__verbout, "XYLA-AVL JOIN ascent, node %c " \
325 "(" #right " child)\n", AVL_BALCH(n)); ) \
326 n->avl.f = (f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_EVEN; \
327 if (updfn) { updfn(cls, &c->bt); updfn(cls, &n->bt); } \
330 /* The node was evenly balanced. It's now biased to the \
331 * right, and we must continue to ascend. \
341 V( if (xyla__verbout && (xyla__verbose&XYLA__VF_JOIN)) \
342 fprintf(xyla__verbout, "XYLA-AVL JOIN ascent, node %c " \
343 "(" #right " child)\n", AVL_BALCH(n)); ) \
344 n->avl.f = (f&~XYLA_AVLF_BALMASK) | XYLA_AVLBAL_##RBIAS; \
345 if (updfn) updfn(cls, &c->bt); \
349 root = *left; ht = lht; \
351 FIXUP_JOIN(left, lht, LBIAS, right, rht, RBIAS, ERX);
353 FIXUP_JOIN(right, rht, RBIAS, left, lht, LBIAS, XLE);
356 /* Follow the rest of the path up to the root, updating the nodes. */
357 if (updfn) while (path.k) updfn(cls, *path.p[--path.k]);
361 *left = 0; *right = 0; *root_out = root;
362 if (rootht_out) *rootht_out = ht;
371 int xyla_avl_split(struct xyla_bt_nodecls *cls,
372 struct xyla_bt_node **left_out, int *lht_out,
373 struct xyla_bt_node **mid_out,
374 struct xyla_bt_node **right_out, int *rht_out,
375 struct xyla_avl_path *path)
377 /* Split a tree tree into two at the indicated place indicated by the PATH.
378 * Store in *LEFT_OUT and *RIGHT_OUT the roots of trees containing every
379 * node respectively preceding and following the PATH, and in *LHT_OUT and
380 * *RHT_OUT the heights of the respective output trees; either or both of
381 * LHT_OUT and RHT_OUT may be null to discard this information.
383 * If the PATH is full, i.e., it denotes a node in the input tree, then
384 * that becomes the `middle node', which does not appear in either output
385 * tree; a pointer to this node is stored in *MID_OUT.
387 * Returns `XYLA_RC_OK'.
390 struct xyla_bt_node *left, *right, *sub, **next, **link;
391 struct xyla_avl_node *parent, *node, *mid;
392 int k, ht, lht, rht, subht;
394 /* The basic idea is to work up the tree, following the provided path,
395 * maintaining left and right trees. When we come across a node with a
396 * leftward link, then we join that node and its right subtree onto our
397 * right tree; and /vice-versa/.
400 * ,----|------' `---------|-,
402 * ,---' `---,| ,-|-' `---,
406 * / \ / \ / \ | / \ / \ /|\ / \ / \
407 * * * * * * *| * * * * * | * * * * *
410 * The conceptual difficulty lies in how to initialize the left and right
413 * The other fiddly part is keeping track of the heights as we work through
417 /* Retrieve the designated node. */
418 k = path->k; link = path->p[k]; mid = AVL_NODE(*link);
420 /* The path is full, i.e., it terminates in a link to a node, which will
421 * be the final mid node. Initialize the left and right trees to the
422 * node's left and right subtrees. Work out their heights -- this will
423 * save effort later, because we only need to compute one of them the
427 /* Initialize the left and right trees. */
428 left = mid->bt.left; right = mid->bt.right;
430 /* Initialize the heights. We can work out the heights of the children
431 * from the middle node's height and balance annotation.
433 ht = xyla_avl_height(mid);
434 switch (mid->avl.f&XYLA_AVLF_BALMASK) {
435 case XYLA_AVLBAL_LBIAS: lht = ht - 1; rht = ht - 2; break;
436 case XYLA_AVLBAL_EVEN: lht = rht = ht - 1; break;
437 case XYLA_AVLBAL_RBIAS: lht = ht - 2; rht = ht - 1; break;
438 default: XYLA__ASSERT(0); lht = rht = -1; break;
441 /* Clear the split node's links, so that it's a standalone singleton
442 * tree, and its balance state, so that it looks like a standalone tree.
444 mid->bt.left = mid->bt.right = 0;
445 mid->avl.f &= ~XYLA_AVLF_BALMASK;
447 /* Special case: the split node is the tree root.
449 * This is only `special' because the other case ascends one step higher
450 * in the tree, so we should do the same here.
454 /* Prefetch the next node up the path. */
455 next = path->p[--k]; parent = AVL_NODE(*next);
457 /* The path is empty, i.e., it terminates in a null link, and the final
458 * mid node will be null. The obvious thing to do is to initialize the
459 * left and right trees to be empty, but that will end up doing too much
460 * work. If we examine the diagram above, we see that there will be an
461 * intact subtree on one side of the split line. Working step-by-step
462 * is just busy-work, though not actually harmful.
464 * Instead, we note the direction of the final null link. Every further
465 * link in the path that's in the same direction means that we're still
466 * searching for the initial subtree root.
469 /* Special case: the tree is actually empty. */
470 if (!k) { left = right = 0; lht = rht = 0; goto end; }
472 /* Retrieve the parent node and split into cases according to the link
475 next = path->p[--k]; node = AVL_NODE(*next); ht = 0;
476 if (link == &node->bt.left)
477 #define INIT_SPLIT_EMPTY(left, lht, LBIAS, right, rht, RBIAS) do { \
478 /* The null link points leftwards, so the we're searching to \
479 * bound the initial right tree. \
484 /* We've decided to include the current node in the initial \
485 * tree, so update the height. \
487 ht += (node->avl.f&XYLA_AVLF_##BALMASK) == \
488 XYLA_AVLBAL_##RBIAS ? \
491 /* The current node is the tree root. There can be nothing \
496 right = &node->bt; rht = ht; \
500 /* Ascend the tree and check the link direction. */ \
501 link = next; next = path->p[--k]; parent = AVL_NODE(*next); \
502 if (link != &parent->bt.left) break; \
503 node = parent; link = next; \
507 right = &node->bt; rht = ht; \
509 INIT_SPLIT_EMPTY(left, lht, LBIAS, right, rht, RBIAS);
511 INIT_SPLIT_EMPTY(right, rht, RBIAS, left, lht, LBIAS);
512 #undef INIT_SPLIT_EMPTY
515 /* Ascend the tree, accumulating additional material into the left or right
516 * trees. This is actually much less complicated than it sounds.
519 /* The state at this point is a little tricky.
521 * There is a current node; `link' holds a link to this node, and `ht' is
522 * its height. The subtree rooted at this current node has been split,
523 * resulting in a left tree rooted at `lroot', with height `lht', and a
524 * right tree rooted at `rroot', with height `rht', and, depending on the
525 * initial path, possibly a mid node `mid'.
527 * The node has a `parent'. The link to the parent is in `next'. The
528 * parent and the current node's subling subtree must be accumulated into
529 * the appropriate left or right tree.
532 /* Accumulate the parent and the sibling subtree onto the appropriate
535 if (link == &parent->bt.left) {
536 sub = parent->bt.right;
537 switch (parent->avl.f&XYLA_AVLF_BALMASK) {
538 case XYLA_AVLBAL_LBIAS: subht = ht - 1; ht++; break;
539 case XYLA_AVLBAL_EVEN: subht = ht; ht++; break;
540 case XYLA_AVLBAL_RBIAS: subht = ht + 1; ht += 2; break;
541 default: XYLA__ASSERT(0); subht = -1; break;
543 xyla_avl_join(cls, &right, &rht,
544 &right, rht, &parent->bt, &sub, subht);
546 sub = parent->bt.left;
547 switch (parent->avl.f&XYLA_AVLF_BALMASK) {
548 case XYLA_AVLBAL_LBIAS: subht = ht + 1; ht += 2; break;
549 case XYLA_AVLBAL_EVEN: subht = ht; ht++; break;
550 case XYLA_AVLBAL_RBIAS: subht = ht - 1; ht++; break;
551 default: XYLA__ASSERT(0); subht = -1; break;
553 xyla_avl_join(cls, &left, &lht,
554 &sub, subht, &parent->bt, &left, lht);
557 /* If we've reached the root then we're done. */
560 /* Ascend the tree. */
561 link = next; node = parent;
562 next = path->p[--k]; parent = AVL_NODE(*next);
568 *left_out = left; if (lht_out) *lht_out = lht;
569 *mid_out = mid ? &mid->bt : 0;
570 *right_out = right; if (rht_out) *rht_out = rht;
574 int xyla_avl_splitat(struct xyla_bt_nodecls *cls,
575 struct xyla_bt_node **left_out, int *lht_out,
576 struct xyla_bt_node **mid_out,
577 struct xyla_bt_node **right_out, int *rht_out,
578 struct xyla_bt_node **root, const void *key)
580 /* Split the tree rooted at ROOT into two at an indicated KEY. Store in
581 * *LEFT_OUT and *RIGHT_OUT the roots of trees containing every node whose
582 * key is respectively less than and greater than the KEY, and in *LHT_OUT
583 * and *RHT_OUT the heights of the respective output trees; either or both
584 * of LHT_OUT and RHT_OUT may be null to discard this information.
586 * If a node matching the KEY exists in the input tree, then that becomes
587 * the `middle node', which does not appear in either output tree; a
588 * pointer to this node is stored in *MID_OUT.
590 * This operation assumes that the tree traversal ordering matches an
591 * ordering on search keys, which is implemented by the node-class `nav'
594 * Returns `XYLA_RC_OK'.
597 const struct xyla_bt_ordops *ops = ORDER_OPS(cls->ops);
598 struct xyla_bt_node *node;
599 struct xyla_avl_path path;
602 XYLA_BT_PROBE(rc, node, cls, root, ops->ord.nav, UNCONST(void, key),
603 path.p, path.k, XYLA_AVL_HTMAX);
604 XYLA__ASSERT(!rc); (void)node;
605 return (xyla_avl_split(cls, left_out, lht_out, mid_out, right_out, rht_out,
609 int xyla_avl_splitroot(struct xyla_bt_node **left_out, int *lht_out,
610 struct xyla_bt_node **root_out,
611 struct xyla_bt_node **right_out, int *rht_out,
612 struct xyla_bt_node **root, int ht)
614 /* Split the tree rooted at ROOT into two at its root. Store in *ROOT_OUT
615 * a pointer to the original root node, or null if the tree was initially
616 * empty; store in *LEFT_OUT and *RIGHT_OUT the root of the original root's
617 * left and right subtrees respectively, and in *LHT_OUT and *RHT_OUT the
618 * heights of the respective output trees; either or both of LHT_OUT and
619 * RHT_OUT may be null to discard this information. If the initial tree
620 * height is known, it can be passed in as HT; otherwise, HT must be -1.
622 * Returns `XYLA_RC_OK'.
625 struct xyla_avl_node *left, *right, *node;
628 node = AVL_NODE(*root); *root = 0;
630 *left_out = *root_out = *right_out = 0;
631 if (lht_out) *lht_out = 0;
632 if (rht_out) *rht_out = 0;
634 left = AVL_NODE(node->bt.left); right = AVL_NODE(node->bt.right);
635 node->bt.left = node->bt.right = 0;
636 if (lht_out || rht_out) {
637 if (ht < 0) ht = xyla_avl_height(node);
638 switch (node->avl.f&XYLA_AVLF_BALMASK) {
639 case XYLA_AVLBAL_LBIAS: lht = ht - 1; rht = ht - 2; break;
640 case XYLA_AVLBAL_EVEN: lht = rht = ht - 1; break;
641 case XYLA_AVLBAL_RBIAS: lht = ht - 2; rht = ht - 1; break;
642 default: XYLA__ASSERT(0); lht = rht = -1; break;
644 if (lht_out) *lht_out = lht;
645 if (rht_out) *rht_out = rht;
647 node->avl.f &= ~XYLA_AVLF_BALMASK;
648 *left_out = left ? &left->bt : 0;
649 *root_out = &node->bt;
650 *right_out = right ? &right->bt : 0;
655 /*----- That's all, folks -------------------------------------------------*/