3 * Set utilities for binary search trees
5 * (c) 2024 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Xyla, a library of binary trees.
12 * Xyla is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Lesser General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
17 * Xyla is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
20 * License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Xyla. If not, see <https://www.gnu.org/licenses/>.
26 /*----- Header files ------------------------------------------------------*/
31 /*----- Main code ---------------------------------------------------------*/
33 int xyla_bt_unisect(struct xyla_bt_nodecls *cls,
34 struct xyla_bt_node **uni_out, int *uniht_out,
35 struct xyla_bt_node **isect_out, int *isectht_out,
36 struct xyla_bt_node **aroot, int *aht_inout,
37 struct xyla_bt_node **broot, int *bht_inout,
38 const struct xyla_bt_treeops *ops,
39 struct xyla_bt_setopstack *stack, int htmax)
41 /* Compute the union and intersection of two trees.
43 * On entry, *AROOT and *BROOT are the roots of two trees a and b, with
44 * their respective heights in *AHT_INOUT and *BHT_INOUT. The trees are
45 * considered to represent sets, with one element per node; the element is
46 * determined by the `key' node-class operation. On exit, *UNI_OUT points
47 * to the root of a tree containing (one copy of) each element present in
48 * either a or b, and *ISECT_OUT points to the root of a tree containing
49 * the other copy of each element present in both a and b; the heights of
50 * the output trees (the exact meaning of which is determined by the tree
51 * implementation) are stored in *UNIHT_OUT and *ISECTHT_OUT. If AROOT
52 * and/or BROOT are distinct from UNI_OUT and ISECT_OUT, then null pointers
53 * are stored in them. The original trees are destroyed; each node in the
54 * original operands trees ends up in exactly one of the result trees.
56 * The STACK argument points to caller-provided workspace, with HTMAX
57 * entries. The function returns `XYLA_RC_OK' on success. If the stack is
58 * too small then it returns `XYLA_RC_TALL'; *UNI_OUT and *ISECT_OUT are
59 * unchanged; and the trees rooted at *AROOT and *BROOT are modified, with
60 * their heights at *AHT_INOUT and *BHT_INOUT updated. Retrying the
61 * operation with a larger stack or following rebalancing will produce the
62 * correct result. The critical factor is the maximum height of the tree
66 struct xyla_bt_node *a = *aroot, *b = *broot, *uni, *isect;
68 aht = aht_inout ? *aht_inout : -1,
69 bht = bht_inout ? *bht_inout : -1,
70 uniht, isectht, sp = 0, rc;
71 const struct xyla_bt_ordops *ordops = ORDER_OPS(cls->ops);
72 xyla_bt_keyfn *keyfn = ordops->ord.key;
73 xyla_bt_joinfn *joinfn = ops->join;
74 xyla_bt_splitatfn *splitatfn = ops->splitat;
75 xyla_bt_splitrootfn *splitrootfn = ops->splitroot;
79 /* This is an essentially recursive procedure based on the following
80 * observations. Let A and B be sets of ordered elements.
82 * * If A = ∅ then A ∪ B = B and A ∩ B = ∅.
84 * * If B = ∅ then A ∪ B = A and A ∩ B = ∅.
86 * * Otherwise, let a ∈ A be any element. Let
88 * -- A_L = { x ∈ A | x < A },
89 * -- A_R = { x ∈ A | x > A },
90 * -- B_L = { x ∈ B | x < B },
91 * -- B_M = B ∩ {a}, and
92 * -- B_R = { x ∈ B | x > B }.
96 * -- A ∪ B = (A_L ∪ B_L) ⊎ {a} ⊎ (A_R ∪ B_R), and
97 * -- A ∩ B = (A_L ∩ B_L) ⊎ B_M ⊎ (A_R ∩ B_R).
99 * In practice, we select a as the root of the A tree: the provided
100 * `splitroot' operation selects a and determines A_L and A_R; the
101 * `splitat' operation determines B_L, B_M, and B_R; and the `join'
102 * operator performs the disjoint unions.
104 * Rather than actual recursion, we maintain an explicit stack. A
105 * recursive call is replaced by pushing the remaining state on a stack and
106 * returning to the initial label `entry'. A `return' pops a state from
107 * the stack and forces control back to the appropriate place. If you
108 * squint, then a sequence
110 * stack[sp++].op = FOO; goto entry;
113 * takes the place of a recursive call.
118 /* First base case. If A is empty, then the union must be B, and the
119 * intersection is empty.
122 uni = b; uniht = bht; isect = 0; isectht = 0;
124 /* Second base case. Symmetrically, if B is empty, then the union
125 * must be A, and the intersection is empty.
128 uni = a; uniht = aht; isect = 0; isectht = 0;
130 /* Recursive case. */
132 /* If there's no stack space then abandon ship. */
133 if (sp >= htmax) goto abandon;
135 /* Select an element a ∈ A and split A - {a} into two halves. */
136 rc = splitrootfn(&a, &aht,
138 &stack[sp].u.right.a, &stack[sp].u.right.aht,
140 XYLA__ASSERT(rc >= 0);
142 /* Split B into three pieces. */
146 &stack[sp].u.right.b, &stack[sp].u.right.bht,
147 &b, keyfn(cls, stack[sp].am));
148 XYLA__ASSERT(rc >= 0);
150 /* Recursively compute the union and intersection of the low halves. */
151 stack[sp++].op = RIGHT; goto entry;
154 /* Recursively compute the union and intersection of the high halves.
155 * This is mostly just shuffling the data about. Note that the stack
156 * pointer has just been decremented prior to calling us, so there's no
157 * need to check the stack bound again.
160 a = stack[sp].u.right.a; aht = stack[sp].u.right.aht;
161 b = stack[sp].u.right.b; bht = stack[sp].u.right.bht;
162 stack[sp].u.join_union.uni = uni;
163 stack[sp].u.join_union.uniht = uniht;
164 stack[sp].u.join_union.isect = isect;
165 stack[sp].u.join_union.isectht = isectht;
166 stack[sp++].op = JOIN; goto entry;
169 /* Stitch the answers together to compute the result. */
171 rc = joinfn(cls, &uni, &uniht,
172 &stack[sp].u.join_union.uni, stack[sp].u.join_union.uniht,
175 XYLA__ASSERT(rc >= 0);
176 rc = joinfn(cls, &isect, &isectht,
177 &stack[sp].u.join_union.isect,
178 stack[sp].u.join_union.isectht,
181 XYLA__ASSERT(rc >= 0);
184 /* If the stack isn't empty then we've performed a `recursive' step and
185 * should return to the appropriate place.
187 if (sp) switch (stack[--sp].op) {
188 case RIGHT: goto right;
189 case JOIN: goto join;
190 default: XYLA__ASSERT(0);
194 *aroot = 0; if (aht_inout) *aht_inout = 0;
195 *broot = 0; if (bht_inout) *bht_inout = 0;
196 *uni_out = uni; if (uniht_out) *uniht_out = uniht;
197 *isect_out = isect; if (isectht_out) *isectht_out = isectht;
201 /* Abandon ship if the stack is too small.
203 * We must reassemble all of the pieces of tree on the stack into two valid
204 * trees. We've lost track to some extent of which nodes came from which
205 * tree, but it's enough to produce two trees which will give the same
206 * result when the caller tries again. We'll be OK if we put all of the
207 * `union' nodes in one tree, say a, and all of the `intersection' nodes
208 * in the other, say b. All that remains is to make sure we put them in
211 * We have two kinds of entries on the stack. A `RIGHT' entry holds chunks
212 * of the operand trees which haven't been carved up yet, with deeper
213 * levels of the stack holding pieces from further to the right; these
214 * pieces need to be returned to their original trees, but that's easy
215 * because they're properly labelled. A `JOIN' entry holds chunks of
216 * result trees, with deeper levels of the stack put together from pieces
217 * further to the left. Notice that `unisect''s outputs are a fixed point;
218 * indeed, if A ⊆ B or B ⊆ A then A ∪ B = A and A ∩ B = B, and clearly
219 * A ∩ B ⊆ A ∪ B. Consequently, it doesn't matter which way around we put
220 * these as long as we're consistent. And, finally, we have the current
221 * subtrees that we were looking at when we noticed the stack ran out.
224 while (sp) switch (stack[--sp].op) {
226 rc = joinfn(cls, &a, &aht,
229 &stack[sp].u.right.a, stack[sp].u.right.aht);
230 XYLA__ASSERT(rc >= 0);
231 rc = joinfn(cls, &b, &bht,
234 &stack[sp].u.right.b, stack[sp].u.right.bht);
235 XYLA__ASSERT(rc >= 0);
238 rc = joinfn(cls, &a, &aht,
239 &stack[sp].u.join_union.uni, stack[sp].u.join_union.uniht,
242 XYLA__ASSERT(rc >= 0);
243 rc = joinfn(cls, &b, &bht,
244 &stack[sp].u.join_union.isect,
245 stack[sp].u.join_union.isectht,
248 XYLA__ASSERT(rc >= 0);
253 *aroot = a; if (aht_inout) *aht_inout = aht;
254 *broot = b; if (bht_inout) *bht_inout = bht;
255 return (XYLA_RC_TALL);
258 int xyla_bt_diffsect(struct xyla_bt_nodecls *cls,
259 struct xyla_bt_node **diff_out, int *diffht_out,
260 struct xyla_bt_node **isect_out, int *isectht_out,
261 struct xyla_bt_node **aroot, int *aht_inout,
262 struct xyla_bt_node **broot,
263 const struct xyla_bt_treeops *ops,
264 struct xyla_bt_setopstack *stack, int htmax)
266 /* Compute the set difference of the two operand trees.
268 * On entry, *AROOT and *BROOT are the roots of two trees a and b, with a's
269 * height in *AHT_INOUT (the height of b is not significant). The trees
270 * are considered to represent sets, with one element per node; the element
271 * is determined by the `key' node-class operation. On exit, *DIFF_OUT
272 * points to the root of a tree containing each element of a but not b, and
273 * *ISECT_OUT points to the root of a tree containing each element of a
274 * that's also in b; the heights of the output trees (the exact meaning of
275 * which is determined by the tree implementation) are stored in
276 * *DIFFHT_OUT and *ISECTHT_OUT. If AROOT and/or BROOT are distinct from
277 * DIFF_OUT and ISECT_OUT, then null pointers are stored in them. The
278 * original a tree is destroyed; each node in the original operands trees
279 * ends up in exactly one of the result trees. The input b tree is
282 * The STACK argument points to caller-provided workspace, with HTMAX
283 * entries. The function return `XYLA_RC_OK' on success. If the stack is
284 * too small then it returns `XYLA_RC_TALL'; *DIFF_OUT is unchanged, and
285 * the nodes of the a tree are split between output trees rooted at *AROOT
286 * and *ISECT_OUT, with the heights at *AHT_INOUT and *ISECTHT_OUT set
287 * appropriately. The critical factor is the maximum height of the tree at
288 * *BROOT. The correct result can be obtained by (a) restoring the
289 * original a set as the union of the *AROOT and *ISECTHT_OUT trees and (b)
290 * retrying the original difference computation with a rebalanced b tree.
291 * Note that the intersection result from (a) will be empty.
294 struct xyla_bt_node *a = *aroot, *b = *broot, *diff, *isect;
295 int aht = aht_inout ? *aht_inout : -1, diffht, isectht, sp = 0, rc;
296 const struct xyla_bt_ordops *ordops = ORDER_OPS(cls->ops);
297 xyla_bt_keyfn *keyfn = ordops->ord.key;
298 xyla_bt_joinfn *joinfn = ops->join;
299 xyla_bt_splitatfn *splitatfn = ops->splitat;
301 enum { RIGHT, JOIN };
303 /* This is an essentially recursive procedure, similar to `xyla_bt_unisect'
304 * above, based on the following observations. Let A and B be sets of
307 * * If A = ∅ then A - B = ∅ and A ∩ B = ∅.
309 * * If B = ∅ then A - B = A and A ∩ B = ∅.
311 * * Otherwise, let b ∈ B be any element. Let
313 * -- A_L = { x ∈ A | x < A },
315 * -- A_R = { x ∈ A | x > A },
316 * -- B_L = { x ∈ B | x < B }, and
317 * -- B_R = { x ∈ B | x > B }.
321 * -- A - B = (A_L - B_L) ⊎ (A_R - B_R), and
322 * -- A ∩ B = (A_L ∩ B_L) ⊎ A_M ⊎ (A_R ∩ B_R).
324 * In practice, we select b as the root of the B tree, and B_L and B_R are
325 * simply its left and right subtrees; it's safe to treat B as a simple
326 * binary search tree, since we never actually pass nodes of B to the
327 * provided operations. The `splitat' operation determines A_L, A_M, and
328 * A_R; and the `join' operation performs the disjoint unions.
330 * Rather than actual recursion, we maintain an explicit stack. A
331 * recursive call is replaced by pushing the remaining state on a stack and
332 * returning to the initial label `entry'. A `return' pops a state from
333 * the stack and forces control back to the appropriate place. If you
334 * squint, then a sequence
336 * stack[sp++].op = FOO; goto entry;
339 * takes the place of a recursive call.
344 /* First base case. If A is empty, then the difference and intersection
345 * must be both be empty.
348 diff = 0; diffht = 0; isect = 0; isectht = 0;
350 /* Second base case. If B is empty, then the difference is A, and the
351 * intersection is empty.
354 diff = a; diffht = aht; isect = 0; isectht = 0;
356 /* Recursive case. */
358 /* If there's no stack space then abandon ship. */
359 if (sp >= htmax) goto abandon;
361 /* Split A into three pieces according to the root of the B tree. */
365 &stack[sp].u.right.a, &stack[sp].u.right.aht,
367 XYLA__ASSERT(rc >= 0);
369 /* Recursively compute the difference and intersection of the low
372 stack[sp].u.right.b = b->right;
374 stack[sp++].op = RIGHT; goto entry;
377 /* Recursively compute the union and intersection of the high halves.
378 * This is mostly just shuffling the data about. Note that the stack
379 * pointer has just been decremented prior to calling us, so there's no
380 * need to check the stack bound again.
383 a = stack[sp].u.right.a; aht = stack[sp].u.right.aht;
384 b = stack[sp].u.right.b;
385 stack[sp].u.join_diff.diff = diff;
386 stack[sp].u.join_diff.diffht = diffht;
387 stack[sp].u.join_diff.isect = isect;
388 stack[sp].u.join_diff.isectht = isectht;
389 stack[sp++].op = JOIN; goto entry;
392 /* Stitch the answers together to compute the result. */
394 rc = joinfn(cls, &diff, &diffht,
395 &stack[sp].u.join_diff.diff, stack[sp].u.join_diff.diffht,
398 XYLA__ASSERT(rc >= 0);
399 rc = joinfn(cls, &isect, &isectht,
400 &stack[sp].u.join_diff.isect, stack[sp].u.join_diff.isectht,
403 XYLA__ASSERT(rc >= 0);
406 /* If the stack isn't empty then we've performed a `recursive' step and
407 * should return to the appropriate place.
409 if (sp) switch (stack[--sp].op) {
410 case RIGHT: goto right;
411 case JOIN: goto join;
412 default: XYLA__ASSERT(0);
416 *aroot = 0; if (aht_inout) *aht_inout = 0;
417 *diff_out = diff; if (diffht_out) *diffht_out = diffht;
418 *isect_out = isect; if (isectht_out) *isectht_out = isectht;
422 /* Abandon ship if the stack is too small.
424 * We must reassemble all of the pieces of tree on the stack into two valid
425 * trees. Unlike `xyla_bt_unisect' above, we know that all of the pieces
426 * belong to the a tree, but they're split between the difference and
427 * intersection results in a way that's impractically difficult to
428 * untangle. As a result, we'll accumulate the difference pieces back into
429 * the a tree, and leave the intersection pieces in a separate tree that
430 * the caller can reassemble. (We can't do it ourselves, because it might
431 * additionally require rebalancing one of those trees too.)
433 * We have two kinds of entries on the stack. A `RIGHT' entry holds chunks
434 * of the operand trees which haven't been carved up yet, with deeper
435 * levels of the stack holding pieces from further to the right; the a
436 * piece here can just be returned to their original tree without
437 * difficulty. A `JOIN' entry holds chunks of result trees, with deeper
438 * levels of the stack put together from pieces further to the left. We'll
439 * just accumulate them into a fresh tree. And, finally, we have the
440 * current subtrees that we were looking at when we noticed the stack ran
444 isect = 0; isectht = 0;
445 while (sp) switch (stack[--sp].op) {
447 rc = joinfn(cls, &a, &aht,
450 &stack[sp].u.right.a, stack[sp].u.right.aht);
451 XYLA__ASSERT(rc >= 0);
454 rc = joinfn(cls, &a, &aht,
455 &stack[sp].u.join_diff.diff, stack[sp].u.join_diff.diffht,
458 XYLA__ASSERT(rc >= 0);
459 rc = joinfn(cls, &isect, &isectht,
460 &stack[sp].u.join_diff.isect,
461 stack[sp].u.join_diff.isectht,
464 XYLA__ASSERT(rc >= 0);
469 *aroot = a; if (aht_inout) *aht_inout = aht;
470 *isect_out = isect; if (isectht_out) *isectht_out = isectht;
471 return (XYLA_RC_TALL);
474 /*----- That's all, folks -------------------------------------------------*/