chiark - git - mdw - xyla/blob - bt-set.c

   1 /* -*-c-*-
   2  *
   3  * Set utilities for binary search trees
   4  *
   5  * (c) 2024 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Xyla, a library of binary trees.
  11  *
  12  * Xyla is free software: you can redistribute it and/or modify it under
  13  * the terms of the GNU Lesser General Public License as published by the
  14  * Free Software Foundation; either version 3 of the License, or (at your
  15  * option) any later version.
  16  *
  17  * Xyla is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  20  * License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with Xyla.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25
  26 /*----- Header files ------------------------------------------------------*/
  27
  28 #include "lib.h"
  29 #include "bt.h"
  30
  31 /*----- Main code ---------------------------------------------------------*/
  32
  33 int xyla_bt_unisect(struct xyla_bt_nodecls *cls,
  34                     struct xyla_bt_node **uni_out, int *uniht_out,
  35                     struct xyla_bt_node **isect_out, int *isectht_out,
  36                     struct xyla_bt_node **aroot, int *aht_inout,
  37                     struct xyla_bt_node **broot, int *bht_inout,
  38                     const struct xyla_bt_treeops *ops,
  39                     struct xyla_bt_setopstack *stack, int htmax)
  40 {
  41   /* Compute the union and intersection of two trees.
  42    *
  43    * On entry, *AROOT and *BROOT are the roots of two trees a and b, with
  44    * their respective heights in *AHT_INOUT and *BHT_INOUT.  The trees are
  45    * considered to represent sets, with one element per node; the element is
  46    * determined by the `key' node-class operation.  On exit, *UNI_OUT points
  47    * to the root of a tree containing (one copy of) each element present in
  48    * either a or b, and *ISECT_OUT points to the root of a tree containing
  49    * the other copy of each element present in both a and b; the heights of
  50    * the output trees (the exact meaning of which is determined by the tree
  51    * implementation) are stored in *UNIHT_OUT and *ISECTHT_OUT.  If AROOT
  52    * and/or BROOT are distinct from UNI_OUT and ISECT_OUT, then null pointers
  53    * are stored in them.  The original trees are destroyed; each node in the
  54    * original operands trees ends up in exactly one of the result trees.
  55    *
  56    * The STACK argument points to caller-provided workspace, with HTMAX
  57    * entries.  The function returns `XYLA_RC_OK' on success.  If the stack is
  58    * too small then it returns `XYLA_RC_TALL'; *UNI_OUT and *ISECT_OUT are
  59    * unchanged; and the trees rooted at *AROOT and *BROOT are modified, with
  60    * their heights at *AHT_INOUT and *BHT_INOUT updated.  Retrying the
  61    * operation with a larger stack or following rebalancing will produce the
  62    * correct result.  The critical factor is the maximum height of the tree
  63    * at *AROOT.
  64    */
  65
  66   struct xyla_bt_node *a = *aroot, *b = *broot, *uni, *isect;
  67   int
  68     aht = aht_inout ? *aht_inout : -1,
  69     bht = bht_inout ? *bht_inout : -1,
  70     uniht, isectht, sp = 0, rc;
  71   const struct xyla_bt_ordops *ordops = ORDER_OPS(cls->ops);
  72   xyla_bt_keyfn *keyfn = ordops->ord.key;
  73   xyla_bt_joinfn *joinfn = ops->join;
  74   xyla_bt_splitatfn *splitatfn = ops->splitat;
  75   xyla_bt_splitrootfn *splitrootfn = ops->splitroot;
  76
  77   enum { RIGHT, JOIN };
  78
  79   /* This is an essentially recursive procedure based on the following
  80    * observations.  Let A and B be sets of ordered elements.
  81    *
  82    *   * If A = ∅ then A ∪ B = B and A ∩ B = ∅.
  83    *
  84    *   * If B = ∅ then A ∪ B = A and A ∩ B = ∅.
  85    *
  86    *   * Otherwise, let a ∈ A be any element.  Let
  87    *
  88    *       -- A_L = { x ∈ A | x < A },
  89    *       -- A_R = { x ∈ A | x > A },
  90    *       -- B_L = { x ∈ B | x < B },
  91    *       -- B_M = B ∩ {a}, and
  92    *       -- B_R = { x ∈ B | x > B }.
  93    *
  94    *     Then
  95    *
  96    *       -- A ∪ B = (A_L ∪ B_L) ⊎ {a} ⊎ (A_R ∪ B_R), and
  97    *       -- A ∩ B = (A_L ∩ B_L) ⊎ B_M ⊎ (A_R ∩ B_R).
  98    *
  99    * In practice, we select a as the root of the A tree: the provided
 100    * `splitroot' operation selects a and determines A_L and A_R; the
 101    * `splitat' operation determines B_L, B_M, and B_R; and the `join'
 102    * operator performs the disjoint unions.
 103    *
 104    * Rather than actual recursion, we maintain an explicit stack.  A
 105    * recursive call is replaced by pushing the remaining state on a stack and
 106    * returning to the initial label `entry'.  A `return' pops a state from
 107    * the stack and forces control back to the appropriate place.  If you
 108    * squint, then a sequence
 109    *
 110    *    stack[sp++].op = FOO; goto entry;
 111    *  foo:
 112    *
 113    * takes the place of a recursive call.
 114    */
 115
 116 entry:
 117   if (!a) {
 118     /* First base case.  If A is empty, then the union must be B, and the
 119      * intersection is empty.
 120      */
 121
 122     uni = b; uniht = bht; isect = 0; isectht = 0;
 123   } else if (!b) {
 124     /* Second base case.  Symmetrically, if B is empty, then the union
 125      * must be A, and the intersection is empty.
 126      */
 127
 128     uni = a; uniht = aht; isect = 0; isectht = 0;
 129   } else {
 130     /* Recursive case. */
 131
 132     /* If there's no stack space then abandon ship. */
 133     if (sp >= htmax) goto abandon;
 134
 135     /* Select an element a ∈ A and split A - {a} into two halves. */
 136     rc = splitrootfn(&a, &aht,
 137                      &stack[sp].am,
 138                      &stack[sp].u.right.a, &stack[sp].u.right.aht,
 139                      &a, aht);
 140       XYLA__ASSERT(rc >= 0);
 141
 142     /* Split B into three pieces. */
 143     rc = splitatfn(cls,
 144                    &b, &bht,
 145                    &stack[sp].bm,
 146                    &stack[sp].u.right.b, &stack[sp].u.right.bht,
 147                    &b, keyfn(cls, stack[sp].am));
 148       XYLA__ASSERT(rc >= 0);
 149
 150     /* Recursively compute the union and intersection of the low halves. */
 151     stack[sp++].op = RIGHT; goto entry;
 152
 153   right:
 154     /* Recursively compute the union and intersection of the high halves.
 155      * This is mostly just shuffling the data about.  Note that the stack
 156      * pointer has just been decremented prior to calling us, so there's no
 157      * need to check the stack bound again.
 158      */
 159
 160     a = stack[sp].u.right.a; aht = stack[sp].u.right.aht;
 161     b = stack[sp].u.right.b; bht = stack[sp].u.right.bht;
 162     stack[sp].u.join_union.uni = uni;
 163     stack[sp].u.join_union.uniht = uniht;
 164     stack[sp].u.join_union.isect = isect;
 165     stack[sp].u.join_union.isectht = isectht;
 166     stack[sp++].op = JOIN; goto entry;
 167
 168   join:
 169     /* Stitch the answers together to compute the result. */
 170
 171     rc = joinfn(cls, &uni, &uniht,
 172                 &stack[sp].u.join_union.uni, stack[sp].u.join_union.uniht,
 173                 stack[sp].am,
 174                 &uni, uniht);
 175       XYLA__ASSERT(rc >= 0);
 176     rc = joinfn(cls, &isect, &isectht,
 177                 &stack[sp].u.join_union.isect,
 178                   stack[sp].u.join_union.isectht,
 179                 stack[sp].bm,
 180                 &isect, isectht);
 181       XYLA__ASSERT(rc >= 0);
 182   }
 183
 184   /* If the stack isn't empty then we've performed a `recursive' step and
 185    * should return to the appropriate place.
 186    */
 187   if (sp) switch (stack[--sp].op) {
 188     case RIGHT: goto right;
 189     case JOIN: goto join;
 190     default: XYLA__ASSERT(0);
 191   }
 192
 193   /* All is done. */
 194   *aroot = 0; if (aht_inout) *aht_inout = 0;
 195   *broot = 0; if (bht_inout) *bht_inout = 0;
 196   *uni_out = uni; if (uniht_out) *uniht_out = uniht;
 197   *isect_out = isect; if (isectht_out) *isectht_out = isectht;
 198   return (XYLA_RC_OK);
 199
 200 abandon:
 201   /* Abandon ship if the stack is too small.
 202    *
 203    * We must reassemble all of the pieces of tree on the stack into two valid
 204    * trees.  We've lost track to some extent of which nodes came from which
 205    * tree, but it's enough to produce two trees which will give the same
 206    * result when the caller tries again.  We'll be OK if we put all of the
 207    * `union' nodes in one tree, say a, and all of the `intersection' nodes
 208    * in the other, say b.  All that remains is to make sure we put them in
 209    * the right order.
 210    *
 211    * We have two kinds of entries on the stack.  A `RIGHT' entry holds chunks
 212    * of the operand trees which haven't been carved up yet, with deeper
 213    * levels of the stack holding pieces from further to the right; these
 214    * pieces need to be returned to their original trees, but that's easy
 215    * because they're properly labelled.  A `JOIN' entry holds chunks of
 216    * result trees, with deeper levels of the stack put together from pieces
 217    * further to the left.  Notice that `unisect''s outputs are a fixed point;
 218    * indeed, if A ⊆ B or B ⊆ A then A ∪ B = A and A ∩ B = B, and clearly
 219    * A ∩ B ⊆ A ∪ B.  Consequently, it doesn't matter which way around we put
 220    * these as long as we're consistent.  And, finally, we have the current
 221    * subtrees that we were looking at when we noticed the stack ran out.
 222    */
 223
 224   while (sp) switch (stack[--sp].op) {
 225     case RIGHT:
 226       rc = joinfn(cls, &a, &aht,
 227                   &a, aht,
 228                   stack[sp].am,
 229                   &stack[sp].u.right.a, stack[sp].u.right.aht);
 230         XYLA__ASSERT(rc >= 0);
 231       rc = joinfn(cls, &b, &bht,
 232                   &b, bht,
 233                   stack[sp].bm,
 234                   &stack[sp].u.right.b, stack[sp].u.right.bht);
 235         XYLA__ASSERT(rc >= 0);
 236       break;
 237     case JOIN:
 238       rc = joinfn(cls, &a, &aht,
 239                   &stack[sp].u.join_union.uni, stack[sp].u.join_union.uniht,
 240                   stack[sp].am,
 241                   &a, aht);
 242         XYLA__ASSERT(rc >= 0);
 243       rc = joinfn(cls, &b, &bht,
 244                   &stack[sp].u.join_union.isect,
 245                     stack[sp].u.join_union.isectht,
 246                   stack[sp].bm,
 247                   &b, bht);
 248         XYLA__ASSERT(rc >= 0);
 249       break;
 250     default:
 251       XYLA__ASSERT(0);
 252   }
 253   *aroot = a; if (aht_inout) *aht_inout = aht;
 254   *broot = b; if (bht_inout) *bht_inout = bht;
 255   return (XYLA_RC_TALL);
 256 }
 257
 258 int xyla_bt_diffsect(struct xyla_bt_nodecls *cls,
 259                      struct xyla_bt_node **diff_out, int *diffht_out,
 260                      struct xyla_bt_node **isect_out, int *isectht_out,
 261                      struct xyla_bt_node **aroot, int *aht_inout,
 262                      struct xyla_bt_node **broot,
 263                      const struct xyla_bt_treeops *ops,
 264                      struct xyla_bt_setopstack *stack, int htmax)
 265 {
 266   /* Compute the set difference of the two operand trees.
 267    *
 268    * On entry, *AROOT and *BROOT are the roots of two trees a and b, with a's
 269    * height in *AHT_INOUT (the height of b is not significant).  The trees
 270    * are considered to represent sets, with one element per node; the element
 271    * is determined by the `key' node-class operation.  On exit, *DIFF_OUT
 272    * points to the root of a tree containing each element of a but not b, and
 273    * *ISECT_OUT points to the root of a tree containing each element of a
 274    * that's also in b; the heights of the output trees (the exact meaning of
 275    * which is determined by the tree implementation) are stored in
 276    * *DIFFHT_OUT and *ISECTHT_OUT.  If AROOT and/or BROOT are distinct from
 277    * DIFF_OUT and ISECT_OUT, then null pointers are stored in them.  The
 278    * original a tree is destroyed; each node in the original operands trees
 279    * ends up in exactly one of the result trees.  The input b tree is
 280    * unchanged.
 281    *
 282    * The STACK argument points to caller-provided workspace, with HTMAX
 283    * entries.  The function return `XYLA_RC_OK' on success.  If the stack is
 284    * too small then it returns `XYLA_RC_TALL'; *DIFF_OUT is unchanged, and
 285    * the nodes of the a tree are split between output trees rooted at *AROOT
 286    * and *ISECT_OUT, with the heights at *AHT_INOUT and *ISECTHT_OUT set
 287    * appropriately.  The critical factor is the maximum height of the tree at
 288    * *BROOT.  The correct result can be obtained by (a) restoring the
 289    * original a set as the union of the *AROOT and *ISECTHT_OUT trees and (b)
 290    * retrying the original difference computation with a rebalanced b tree.
 291    * Note that the intersection result from (a) will be empty.
 292    */
 293
 294   struct xyla_bt_node *a = *aroot, *b = *broot, *diff, *isect;
 295   int aht = aht_inout ? *aht_inout : -1, diffht, isectht, sp = 0, rc;
 296   const struct xyla_bt_ordops *ordops = ORDER_OPS(cls->ops);
 297   xyla_bt_keyfn *keyfn = ordops->ord.key;
 298   xyla_bt_joinfn *joinfn = ops->join;
 299   xyla_bt_splitatfn *splitatfn = ops->splitat;
 300
 301   enum { RIGHT, JOIN };
 302
 303   /* This is an essentially recursive procedure, similar to `xyla_bt_unisect'
 304    * above, based on the following observations.  Let A and B be sets of
 305    * ordered elements.
 306    *
 307    *   * If A = ∅ then A - B = ∅ and A ∩ B = ∅.
 308    *
 309    *   * If B = ∅ then A - B = A and A ∩ B = ∅.
 310    *
 311    *   * Otherwise, let b ∈ B be any element.  Let
 312    *
 313    *       -- A_L = { x ∈ A | x < A },
 314    *       -- A_M = A ∩ {b},
 315    *       -- A_R = { x ∈ A | x > A },
 316    *       -- B_L = { x ∈ B | x < B }, and
 317    *       -- B_R = { x ∈ B | x > B }.
 318    *
 319    *     Then
 320    *
 321    *       -- A - B = (A_L - B_L) ⊎ (A_R - B_R), and
 322    *       -- A ∩ B = (A_L ∩ B_L) ⊎ A_M ⊎ (A_R ∩ B_R).
 323    *
 324    * In practice, we select b as the root of the B tree, and B_L and B_R are
 325    * simply its left and right subtrees; it's safe to treat B as a simple
 326    * binary search tree, since we never actually pass nodes of B to the
 327    * provided operations.  The `splitat' operation determines A_L, A_M, and
 328    * A_R; and the `join' operation performs the disjoint unions.
 329    *
 330    * Rather than actual recursion, we maintain an explicit stack.  A
 331    * recursive call is replaced by pushing the remaining state on a stack and
 332    * returning to the initial label `entry'.  A `return' pops a state from
 333    * the stack and forces control back to the appropriate place.  If you
 334    * squint, then a sequence
 335    *
 336    *    stack[sp++].op = FOO; goto entry;
 337    *  foo:
 338    *
 339    * takes the place of a recursive call.
 340    */
 341
 342 entry:
 343   if (!a) {
 344     /* First base case.  If A is empty, then the difference and intersection
 345      * must be both be empty.
 346      */
 347
 348     diff = 0; diffht = 0; isect = 0; isectht = 0;
 349   } else if (!b) {
 350     /* Second base case.  If B is empty, then the difference is A, and the
 351      * intersection is empty.
 352      */
 353
 354     diff = a; diffht = aht; isect = 0; isectht = 0;
 355   } else {
 356     /* Recursive case. */
 357
 358     /* If there's no stack space then abandon ship. */
 359     if (sp >= htmax) goto abandon;
 360
 361     /* Split A into three pieces according to the root of the B tree. */
 362     rc = splitatfn(cls,
 363                    &a, &aht,
 364                    &stack[sp].am,
 365                    &stack[sp].u.right.a, &stack[sp].u.right.aht,
 366                    &a, keyfn(cls, b));
 367       XYLA__ASSERT(rc >= 0);
 368
 369     /* Recursively compute the difference and intersection of the low
 370      * halves.
 371      */
 372     stack[sp].u.right.b = b->right;
 373     b = b->left;
 374     stack[sp++].op = RIGHT; goto entry;
 375
 376   right:
 377     /* Recursively compute the union and intersection of the high halves.
 378      * This is mostly just shuffling the data about.  Note that the stack
 379      * pointer has just been decremented prior to calling us, so there's no
 380      * need to check the stack bound again.
 381      */
 382
 383     a = stack[sp].u.right.a; aht = stack[sp].u.right.aht;
 384     b = stack[sp].u.right.b;
 385     stack[sp].u.join_diff.diff = diff;
 386     stack[sp].u.join_diff.diffht = diffht;
 387     stack[sp].u.join_diff.isect = isect;
 388     stack[sp].u.join_diff.isectht = isectht;
 389     stack[sp++].op = JOIN; goto entry;
 390
 391   join:
 392     /* Stitch the answers together to compute the result. */
 393
 394     rc = joinfn(cls, &diff, &diffht,
 395                 &stack[sp].u.join_diff.diff, stack[sp].u.join_diff.diffht,
 396                 0,
 397                 &diff, diffht);
 398       XYLA__ASSERT(rc >= 0);
 399     rc = joinfn(cls, &isect, &isectht,
 400                 &stack[sp].u.join_diff.isect, stack[sp].u.join_diff.isectht,
 401                 stack[sp].am,
 402                 &isect, isectht);
 403       XYLA__ASSERT(rc >= 0);
 404   }
 405
 406   /* If the stack isn't empty then we've performed a `recursive' step and
 407    * should return to the appropriate place.
 408    */
 409   if (sp) switch (stack[--sp].op) {
 410     case RIGHT: goto right;
 411     case JOIN: goto join;
 412     default: XYLA__ASSERT(0);
 413   }
 414
 415   /* All is done. */
 416   *aroot = 0; if (aht_inout) *aht_inout = 0;
 417   *diff_out = diff; if (diffht_out) *diffht_out = diffht;
 418   *isect_out = isect; if (isectht_out) *isectht_out = isectht;
 419   return (XYLA_RC_OK);
 420
 421 abandon:
 422   /* Abandon ship if the stack is too small.
 423    *
 424    * We must reassemble all of the pieces of tree on the stack into two valid
 425    * trees.  Unlike `xyla_bt_unisect' above, we know that all of the pieces
 426    * belong to the a tree, but they're split between the difference and
 427    * intersection results in a way that's impractically difficult to
 428    * untangle.  As a result, we'll accumulate the difference pieces back into
 429    * the a tree, and leave the intersection pieces in a separate tree that
 430    * the caller can reassemble.  (We can't do it ourselves, because it might
 431    * additionally require rebalancing one of those trees too.)
 432    *
 433    * We have two kinds of entries on the stack.  A `RIGHT' entry holds chunks
 434    * of the operand trees which haven't been carved up yet, with deeper
 435    * levels of the stack holding pieces from further to the right; the a
 436    * piece here can just be returned to their original tree without
 437    * difficulty.  A `JOIN' entry holds chunks of result trees, with deeper
 438    * levels of the stack put together from pieces further to the left.  We'll
 439    * just accumulate them into a fresh tree.  And, finally, we have the
 440    * current subtrees that we were looking at when we noticed the stack ran
 441    * out.
 442    */
 443
 444   isect = 0; isectht = 0;
 445   while (sp) switch (stack[--sp].op) {
 446     case RIGHT:
 447       rc = joinfn(cls, &a, &aht,
 448                   &a, aht,
 449                   stack[sp].am,
 450                   &stack[sp].u.right.a, stack[sp].u.right.aht);
 451         XYLA__ASSERT(rc >= 0);
 452       break;
 453     case JOIN:
 454       rc = joinfn(cls, &a, &aht,
 455                   &stack[sp].u.join_diff.diff, stack[sp].u.join_diff.diffht,
 456                   stack[sp].am,
 457                   &a, aht);
 458         XYLA__ASSERT(rc >= 0);
 459       rc = joinfn(cls, &isect, &isectht,
 460                   &stack[sp].u.join_diff.isect,
 461                     stack[sp].u.join_diff.isectht,
 462                   0,
 463                   &isect, isectht);
 464         XYLA__ASSERT(rc >= 0);
 465       break;
 466     default:
 467       XYLA__ASSERT(0);
 468   }
 469   *aroot = a; if (aht_inout) *aht_inout = aht;
 470   *isect_out = isect; if (isectht_out) *isectht_out = isectht;
 471   return (XYLA_RC_TALL);
 472 }
 473
 474 /*----- That's all, folks -------------------------------------------------*/