.\" -*-nroff-*- .\" .\" Manual for Xyla .\" .\" (c) 2024 Straylight/Edgeware .\" . .\"----- Licensing notice --------------------------------------------------- .\" .\" This file is part of Xyla, a library of binary trees. .\" .\" Xyla is free software: you can redistribute it and/or modify it under .\" the terms of the GNU Lesser General Public License as published by the .\" Free Software Foundation; either version 3 of the License, or (at your .\" option) any later version. .\" .\" Xyla is distributed in the hope that it will be useful, but WITHOUT .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public .\" License for more details. .\" .\" You should have received a copy of the GNU Lesser General Public .\" License along with Xyla. If not, see . . .TH xyla 3 "9 September 2024" "Straylight/Edgeware" "Xyla binary trees library" . .SH NAME xyla \- fast, hard-mode binary trees . .\"-------------------------------------------------------------------------- .\" Preliminary definitions. . .\" String definitions and font selection. .ie t \{\ . ds o \(bu . ds ss \s8\u . ds se \d\s0 . ds us \s8\d . ds ue \u\s0 . ds *d \(*d . ds /= \(!= . ds <= \(<= . ds >= \(>= . ds mu \(mu . ds sr \(sr . ds ' \(fm . ds , \h'\w'\ 'u/2u' . if \n(.g \{\ . fam P . ev an-1 . fam P . ev . \} .\} .el \{\ . ds o o . ds ss ^ . ds se . ds us _ . ds ue . ds mu * . ds sr sqrt . ds ' \(aq . ds *d \,\fI\&delta\/\fP . ds /= /= . ds <= <= . ds >= >= . ds , \ \" .\} .. . .\" hP TEXT -- start an indented paragraph with TEXT hanging off to the left .de hP . IP . ft B \h'-\w'\\$1\ 'u'\\$1\ \c . ft P .. . .\" .VS ... .VE -- present a code sample; separate paragraphs with `.VP' .de VS . sp . RS . nf . ft B .. .ie t \{\ . de VP . sp .4v .. .\} .el \{\ . de VP . sp .. .\} .de VE . ft R . fi . RE . sp .. . .\"-------------------------------------------------------------------------- .SH SYNOPSIS . .nf .ne 5 .B "#include " .B "#include " .B "#include " .B "#include " .B "#include " .PP .ne 3 .fi .ta \w'\fB/'u .B /* In the following, .I tree is any of .BR avl , .BR rb , .BR splay , or .BR treap ; .br .B " * " .I TREE is the corresponding one of .BR AVL , .BR RB , .BR SPLAY , or .BR TREAP ; and .br .B " * " .I t is the corresponding one of .BR avl , .BR rb , .BR spl , or .BR trp . .br .B " */" .nf .PP .ne 11 .BR /* " Return codes. " */ .ta 2n .B "enum {" .B " XYLA_RC_OK = 0," .B " XYLA_RC_HTCHG = ...," .B " XYLA_RC_FAIL = ...," .B " XYLA_RC_TALL = ...," .B " XYLA_RC_BAD = ...," .B " XYLA_RC_NOMEM = ...," .B " XYLA_RC_LOSELIMIT = ...," .B " XYLA_RC_WINLIMIT = ..." .B "};" .PP .BI "const char *xyla_strerror(int " rc ); .PP .ne 6 .BR /* " Basic node structure. " */ .ta 2n .B "struct xyla_bt_node {" .B " struct xyla_bt_node *left, *right;" .B "};" .PP .ne 4 .BR /* " Node class. " */ .ta 2n .B "struct xyla_bt_nodecls {" .B " const struct xyla_bt_nodeops *ops;" .B "};" .PP .ne 8 .ta \w'\fB\&typedef void xyla_bt_updfn('u .BI "typedef void xyla_bt_updfn(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *" node ); .B "typedef void xyla_bt_dummyfn(void);" .ta 2n .B "struct xyla_bt_nodeops {" .B " size_t sz;" .B " xyla_bt_updfn *upd;" .B " xyla_bt_dummyfn *_rsvd2, *_rsvd3"; .B "};" .PP .ne 13 .ta \w'\fB\&typedef int xyla_bt_navfn('u .BI "typedef int xyla_bt_navfn(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_bt_node *" node , .BI " void *" arg ); .ta \w'\fB\&typedef const void *xyla_bt_keyfn('u .BI "typedef const void *xyla_bt_keyfn(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_bt_node *" node ); .ta 2n .B "struct xyla_bt_ordopslots {" .B " xyla_bt_navfn *nav;" .B " xyla_bt_keyfn *key;" .B "};" .B "struct xyla_bt_ordops {" .B " struct xyla_bt_nodeops node;" .B " struct xyla_bt_ordopslots ord;" .B "};" .PP .ne 6 .ta 2n .BR /* " Node positions. " */ .B "enum {" .B " XYLA_BTPOS_ROOT," .B " XYLA_BTPOS_LEFT," .B " XYLA_BTPOS_RIGHT" .B "};" .PP .ne 9 .BR /* " Iteration. " */ .BI "struct xyla_" tree "_iter { ...\& };" .BI "struct xyla_" tree "_riter { ...\& };" .ta \w'\fB\&void xyla_\fI\&tree\/\fB_inititer('u .BI "void xyla_" tree "_inititer(const struct xyla_bt_node *" root , .BI " struct xyla_" tree "_iter *" it ); .BI "void *xyla_" tree "_next(struct xyla_" tree "_iter *" it ); .ta \w'\fB\&void xyla_\fI\&tree\/\fB_initriter('u .BI "void xyla_" tree "_initriter(const struct xyla_bt_node *" root , .BI " struct xyla_" tree "_riter *" rit ); .BI "void *xyla_" tree "_prev(struct xyla_" tree "_riter *" rit ); .PP .ne 6 .BR /* " Paths. " */ .BI "struct xyla_" tree "_path { ...\& };" .ta \w'\fB\&typedef int xyla_bt_ascendfn('u .BI "typedef int xyla_bt_ascendfn(struct xyla_bt_node *" node , .BI " struct xyla_bt_node *" parent , .BI " struct xyla_bt_node *" sibling , .BI " unsigned " pos ", void *" arg ); .ne 3 .BI "void *xyla_" tree "_current(const struct xyla_" tree "_path *" path ); .ta \w'\fB\&void xyla_\fI\&tree\/\fB_copypath('u .BI "void xyla_" tree "_copypath(struct xyla_" tree "_path *" newpath , .BI " const struct xyla_" tree "_path *" path ); .ne 8 .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_firstpath('u .BI "void *xyla_" tree "_firstpath(struct xyla_bt_node **" root , .BI " struct xyla_" tree "_path *" path ); .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_lastpath('u .BI "void *xyla_" tree "_lastpath(struct xyla_bt_node **" root , .BI " struct xyla_" tree "_path *" path ); .BI "void *xyla_" tree "_nextpath(struct xyla_" tree "_path *" path ); .BI "void *xyla_" tree "_prevpath(struct xyla_" tree "_path *" path ); .BI "void xyla_" tree "_beforepath(struct xyla_" tree "_path *" path ); .BI "void xyla_" tree "_afterpath(struct xyla_" tree "_path *" path ); .ne 6 .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_rootpath('u .BI "void *xyla_" tree "_rootpath(struct xyla_" tree "_path *" path , .BI " struct xyla_bt_node **" root ); .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_uppath('u .BI "void *xyla_" tree "_uppath(unsigned *" pos_out , .BI " const struct xyla_" tree "_path *" path ); .BI "void *xyla_" tree "_leftpath(struct xyla_" tree "_path *" path ); .BI "void *xyla_" tree "_rightpath(struct xyla_" tree "_path *" path ); .ne 6 .ta \w'\fB\&void xyla_\fI\&tree\/\fB_replace('u .BI "void xyla_" tree "_replace(const struct xyla_" tree "_path *" path , .BI " struct xyla_" tree "_node *" node ); .ta \w'\fB\&void xyla_\fI\&tree\/\fB_ripple('u .BI "void xyla_" tree "_ripple(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_" tree "_path *" path ); .ta \w'\fB\&int xyla_\fI\&tree\/\fB_ascend('u .BI "int xyla_" tree "_ascend(xyla_bt_ascendfn *" fn , .BI " const struct xyla_" tree "_path *" path ", void *" arg ); .PP .ne 7 .BR /* " Searching. " */ .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_lookup('u .BI "void *xyla_" tree "_lookup(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" root , .BI " xyla_bt_navfn *" nav ", void *" arg ); .ta \w'\fB\&void *xyla_\fI\&tree\/\fB_probe('u .BI "void *xyla_" tree "_probe(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" root , .BI " xyla_bt_navfn *" nav ", void *" arg , .BI " struct xyla_" tree "_path *" path ); .PP .ne 6 .BR /* " Insertion and removal. " */ .ta \w'\fB\&int xyla_\fI\&tree\/\fB_insert('u .BI "int xyla_" tree "_insert(struct xyla_bt_nodecls *" cls , .BI " struct xyla_" tree "_path *" path , .BI " struct xyla_" tree "_node *" node ); .ta \w'\fB\&int xyla_\fI\&tree\/\fB_remove('u .BI "int xyla_" tree "_remove(struct xyla_bt_nodecls *" cls , .BI " struct xyla_" tree "_path *" path ); .PP .ne 6 .BR /* " Joining and splitting. " */ .ta \w'\fB\&int xyla_\fI\&tree\/\fB_join('u .BI "int xyla_" tree "_join(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" root_out\fR[ ", int *" rootht_out\fR] , .BI " struct xyla_bt_node **" left\fR[ ", int " lht\fR] , .BI " struct xyla_bt_node *" mid , .BI " struct xyla_bt_node **" right\fR[ ", int " rht\fR] ); .ne 4 .ta \w'\fB\&int xyla_\fI\&tree\/\fB_split('u .BI "int xyla_" tree "_split(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" left_out\fR[ ", int *" lht_out\fR] , .BI " struct xyla_bt_node **" mid_out , .BI " struct xyla_bt_node **" right_out\fR[ ", int *" rht_out\fR] , .BI " struct xyla_" tree "_path *" path ); .ne 7 .ta \w'\fB\&int xyla_\fI\&tree\/\fB_splitat('u +2n .BI "int xyla_" tree "_splitat(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" left_out , .BI " " \fR[ "int *" lht_out ,\fR] .BI " struct xyla_bt_node **" mid_out , .BI " struct xyla_bt_node **" right_out , .BI " " \fR[ "int *" lht_out ,\fR] .BI " struct xyla_bt_node **" root , .BI " const void *" key ); .ne 6 .ta \w'\fB\&int xyla_\fI\&tree\/\fB_splitroot('u +2n .BI "int xyla_" tree "_splitroot(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" left_out , .BI " " \fR[ "int *" lht_out ,\fR] .BI " struct xyla_bt_node **" root_out , .BI " struct xyla_bt_node **" right_out , .BI " " \fR[ "int *" rht_out ,\fR] .BI " struct xyla_bt_node **" root\fR[ ", int " ht\fR] ); .PP .ne 8 .BR /* " Set operations. " */ .ta \w'\fB\&int xyla_\fI\&tree\/\fB_unisect('u +2n .BI "int xyla_" tree "_unisect(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" uni_out , .BI " " \fR[ "int *" uniht_out ,\fR] .BI " struct xyla_bt_node **" isect_out , .BI " " \fR[ "int *" isectht_out ,\fR] .BI " struct xyla_bt_node **" aroot\fR[ ", int " aht\fR] , .BI " struct xyla_bt_node **" broot\fR[ ", int " bht\fR] ); .ne 7 .ta \w'\fB\&int xyla_\fI\&tree\/\fB_diffsect('u +2n .BI "int xyla_" tree "_diffsect(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" diff_out , .BI " " \fR[ "int *" diffht_out ,\fR] .BI " struct xyla_bt_node **" isect_out , .BI " " \fR[ "int *" isectht_out ,\fR] .BI " struct xyla_bt_node **" aroot\fR[ ", int " aht\fR] , .BI " struct xyla_bt_node **" broot ); .PP .ne 5 .BR /* " Checking. " */ .ta \w'\fB\&int xyla_\fI\&tree\/\fB_check('u .BI "int xyla_" tree "_check(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *const *" root , .BI " FILE *" fp ", unsigned " f , .BI " " \fR[ "int " expht "," \fR] " void *" arg ); .PP .ne 8 .ta 2n .BR /* " AVL tree node structure. " */ .B "#define XYLA_AVLF_BALMASK 3u" .B "#define XYLA_AVLBAL_LBIAS 1u" .B "#define XYLA_AVLBAL_EVEN 0u" .B "#define XYLA_AVLBAL_RBIAS 2u" .B "struct xyla_avl_nodeslots {" .B " unsigned f;" .B "};" .PP .ne 2 .BR /* " AVL tree operations. " */ .BI "int xyla_avl_height(const struct xyla_avl_node *" node ); .PP .ta 2n .ne 5 .BR /* " Red-black tree node structure. " */ .B "#define XYLA_RBF_RED 1u" .B "struct xyla_rb_nodeslots {" .B " unsigned f;" .B "};" .PP .ne 2 .BR /* " Red-black tree operations. " */ .BI "int xyla_rb_height(const struct xyla_rb_node *" node ); .PP .ne 4 .ta 2n .BR /* " Splay tree node structure. " */ .B "struct xyla_splay_nodeslots {" .B " struct xyla_splay_node *parent;" .B "};" .PP .ne 5 .BR /* " Splay tree operations. " */ .ta \w'\fB\&void xyla_splay_splay('u .BI "void xyla_splay_splay(struct xyla_bt_nodecls *" cls , .BI " struct xyla_splay_path *" path ); .ta \w'\fB\&void xyla_splay_rebalance('u .BI "void xyla_splay_rebalance(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" root ); .PP .ne 4 .ta 2n .BR /* " Treap node structure. " */ .B "struct xyla_treap_nodeslots {" .B " size_t wt;" .B "};" .PP .ne 8 .ta 2n 64n .BR /* " Common node structure machinery. " */ .B "#define XYLA_BT_NODEPFX struct xyla_bt_node node" .BI "#define XYLA_" TREE "_NODEPFX \e" .B " XYLA_BT_NODEPFX; \e" .BI " struct xyla_" tree "_nodeslots " t .BI "struct xyla_" tree "_node {" .B " struct xyla_bt_node bt;" .BI " struct xyla_" tree "_nodeslots " t ; .B "};" .ne 8 .ta 2n 64n .B "#define XYLA_BT_NODEUSFX struct xyla_bt_node node" .BI "#define XYLA_" TREE "_NODEUSFX \e" .BI " struct xyla_" tree "_node " t "; \e" .B " XYLA_BT_NODEUSFX" .BI "union xyla_" tree "_nodeu {" .BI " struct xyla_" tree "_node " t ; .B " struct xyla_bt_node bt;" .B "};" .PP .ne 5 .BR /* " Low-level iteration machinery. " */ .ta 2n .ta \w'\fB\&void XYLA_BT_STEPITER('u .BI "void XYLA_BT_STEPITER(int &" rc ", struct xyla_bt_node *&" node , .BI " struct xyla_bt_node **" stack ", int &" sp ", " back ); .BI "void *xyla_bt_severfirst(struct xyla_bt_node **" root ); .PP .ne 4 .BR /* " Low-level path machinery. " */ .ta 2n +\w'\fB('u .B "struct xyla_bt_node *XYLA_BT_CURRENT" .BI " (struct xyla_bt_node **const *" path , .BI " int " k ); .ne 3 .ta \w'\fB\&void XYLA_BT_COPYPATH('u .BI "void XYLA_BT_COPYPATH(struct xyla_bt_node ***" newpath , .BI " struct xyla_bt_node **const *" path ", int " k ); .BI " struct xyla_bt_node ***" path ", int &" k , .ne 9 .ta \w'\fB\&void XYLA_BT_EXTRMPATH('u .BI "void XYLA_BT_EXTRMPATH(int &" rc ", struct xyla_bt_node *&" node , .BI " struct xyla_bt_node **" root , .BI " struct xyla_bt_node ***" path ", int &" k , .BI " " dir ", int " htmax ); .ta \w'\fB\&void XYLA_BT_STEPPATH('u .BI "void XYLA_BT_STEPPATH(int &" rc ", struct xyla_bt_node *&" node , .BI " struct xyla_bt_node ***" path ", int &" k , .BI " " dir ", " opp ", int " htmax ); .ta \w'\fB\&void XYLA_BT_NUDGEPATH('u .BI "void XYLA_BT_NUDGEPATH(int &" rc , .BI " struct xyla_bt_node ***" path ", int &" k , .BI " " dir ", " opp ", int " htmax ); .ne 8 .ta \w'\fB\&void XYLA_BT_ROOTPATH('u .BI "void XYLA_BT_ROOTPATH(struct xyla_bt_node *&" node , .BI " struct xyla_bt_node **" root , .BI " struct xyla_bt_node ***" path ", int &" k ); .ta \w'\fB\&void XYLA_BT_PARENTPATH('u .BI "void XYLA_BT_PARENTPATH(struct xyla_bt_node *&" node ", unsigned &" pos , .BI " struct xyla_bt_node ***" path ", int &" k ); .ta \w'\fB\&void XYLA_BT_CHILDPATH('u .BI "void XYLA_BT_CHILDPATH(int &" rc ", struct xyla_bt_node *&" node , .BI " struct xyla_bt_node ***" path ", int &" k , .BI " " dir ", int " htmax ); .ne 5 .ta \w'\fB\&void XYLA_BT_RIPPLE('u .BI "void XYLA_BT_RIPPLE(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **const *" path ", int " k ); .ta \w'\fB\&void XYLA_BT_ASCEND('u .BI "void XYLA_BT_ASCEND(int &" rc ", struct xyla_bt_ascendfn *" fn, .BI " struct xyla_bt_node **const *" path ", int " k , .BI " void *" arg ); .PP .ne 10 .BR /* " Low-level search machinery. " */ .ta \w'\fB\&void *XYLA_BT_LOOKUP('u .BI "void *XYLA_BT_LOOKUP(struct xyla_bt_node *&" node , .BI " struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *" root , .BI " xyla_bt_navfn *" navfn ", void *" arg ); .ta \w'\fB\&void *XYLA_BT_PROBE('u .BI "void *XYLA_BT_PROBE(int &" rc ", struct xyla_bt_node *&" node , .BI " struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *" root , .BI " xyla_bt_navfn *" navfn ", void *" arg , .BI " struct xyla_bt_node ***" path ", int &" k ", int " htmax ); .PP .ne 6 .BR /* " Low-level removal machinery. " */ .ta \w'\fB\&int xyla_bt_remove('u +2n .BI "int xyla_bt_remove(struct xyla_bt_node **" del_out , .BI " struct xyla_bt_node **" subst_out , .BI " struct xyla_bt_node **" replace_out , .BI " struct xyla_bt_node ***" path ", int *" k_inout , .BI " int " htmax ); .PP .ne 6 .BR /* " Low-level set-operation machinery. " */ .ta \w'\fB\&typedef int xyla_bt_joinfn('u +2n .BI "typedef int xyla_bt_joinfn(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" root_out , .BI " int *" rootht_out , .BI " struct xyla_bt_node **" left ", int " lht , .BI " struct xyla_bt_node *" mid , .BI " struct xyla_bt_node **" right ", int " rht ); .ne 7 .ta \w'\fB\&typedef int xyla_bt_splitatfn('u +2n .BI "typedef int xyla_bt_splitatfn(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" left_out , .BI " int *" lht_out , .BI " struct xyla_bt_node **" mid_out , .BI " struct xyla_bt_node **" right_out , .BI " int *" lht_out , .BI " struct xyla_bt_node **" root , .BI " const void *" key ); .ne 7 .ta \w'\fB\&typedef int xyla_bt_splitrootfn('u +2n .BI "typedef int xyla_bt_splitrootfn(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" left_out , .BI " int *" lht_out , .BI " struct xyla_bt_node **" root_out , .BI " struct xyla_bt_node **" right_out , .BI " int *" rht_out , .BI " struct xyla_bt_node **" root , .BI " int " ht ); .ne 6 .ta 2n .B "struct xyla_bt_treeops {" .B " xyla_bt_joinfn *join;" .B " xyla_bt_splitatfn *splitat;" .B " xyla_bt_splitrootfn *splitroot;" .B "};" .B "struct xyla_bt_setopstack { ...\& };" .ne 9 .ta \w'\fB\&int xyla_bt_unisect('u +2n .BI "int xyla_bt_unisect(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" uni_out , .BI " int *" uniht_out , .BI " struct xyla_bt_node **" isect_out , .BI " int *" isectht_out , .BI " struct xyla_bt_node **" aroot ", int " aht , .BI " struct xyla_bt_node **" broot ", int " bht , .BI " const struct xyla_bt_treeops *" ops , .BI " struct xyla_bt_setopstack *" stack ", int " htmax ); .ne 9 .ta \w'\fB\&int xyla_bt_diffsect('u +2n .BI "int xyla_bt_diffsect(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node **" diff_out , .BI " int *" diffht_out , .BI " struct xyla_bt_node **" isect_out , .BI " int *" isectht_out , .BI " struct xyla_bt_node **" aroot ", int " aht , .BI " struct xyla_bt_node **" broot , .BI " const struct xyla_bt_treeops *" ops , .BI " struct xyla_bt_setopstack *" stack ", int " htmax ); .PP .ta 2n .ne 8 .BR /* " Low-level tree walk/check framework. " */ .B "enum {" .B " XYLA_CHKOP_NIL," .B " XYLA_CHKOP_SETUP," .B " XYLA_CHKOP_BEFORE," .B " XYLA_CHKOP_MID," .B " XYLA_CHKOP_AFTER," .B " XYLA_CHKOP_TEARDOWN" .B "};" .ne 3 .B "#define XYLA_CHKF_COREMASK 0x003fu" .B "#define XYLA_CHKF_TREEMASK 0x07c0u" .B "#define XYLA_CHKF_APPMASK 0xf800u" .PP .ne 11 .ta 2n .B "struct xyla_bt_check {" .B " struct xyla_bt_nodecls *cls;" .B " struct xyla_bt_node *const *root;" .B " FILE *fp;" .B " unsigned f;" .B " struct xyla_bt_node *parent; void *parent_info;" .B " struct xyla_bt_node *node; unsigned pos; void *node_info;" .B " struct xyla_bt_node *left; void *left_info;" .B " struct xyla_bt_node *right; void *right_info;" .B " void *state;" .B "};" .PP .ne 5 .ta \w'\fB\&typedef int xyla_bt_chkfn('u .BI "typedef int xyla_bt_chkfn(unsigned " op , .BI " const struct xyla_bt_check *" chk ); .ta \w'\fB\&typedef void xyla_bt_idfn('u .BI "typedef void xyla_bt_idfn(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_bt_node *" node , .BI " FILE *" fp ); .ne 9 .ta 2n .B "struct xyla_bt_chkopslots {" .B " xyla_bt_chkfn *chk; size_t infosz;" .B " xyla_bt_idfn *id;" .B "};" .B "struct xyla_bt_chkops {" .B " struct xyla_bt_nodeops node;" .B " struct xyla_bt_ordopslots ord;" .B " struct xyla_bt_chkopslots chk;" .B "};" .PP .ne 6 .ta \w'\fB\&void xyla_bt_bughdr('u .BI "void xyla_bt_bughdr(const char *" token , .BI " struct xyla_bt_node *const *" root ", FILE *" fp ); .ta \w'\fB\&void xyla_bt_printnode('u .BI "void xyla_bt_printnode(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_bt_node *" root ", FILE *" fp ); .PP .ne 6 .ta \w'\fB\&int xyla_bt_check('u +2n .BI "int xyla_bt_check(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *const *" root , .BI " FILE *" fp ", unsigned " f , .BI " xyla_bt_chkfn *" tree_chkfn , .BI " size_t " tree_infosz ", void *" tree_state , .BI " void *" user_state ); .PP .ne 2 .B "struct xyla_bt_ordinfo { ...\& };" .BI "xyla_bt_chkfn xyla_bt_chkorder;" .fi . .\"-------------------------------------------------------------------------- .SH "OVERVIEW" . Xyla is a library which implements binary trees. . .SS "Binary trees" Binary trees are versatile data structures which maintain an .I ordered sequence of items. Sometimes we think of the items arranged in space, and say that items earlier in the sequence are on the .I left and items later in the sequence are on the .IR right . (Apologies to readers whose culture conventionally arranges things from right to left.) Elements can be searched for; enumerated; inserted between any adjacent pair of existing items, or at either end; and removed. Sequences can be split at any position, and concatenated. If the sequences are thought of as representing mathematical sets, then intersections, unions, and differences can be determined. .PP Technically, a binary tree is either .I nil or consists of a .IR node , which carries an item of data, and has links to .I left and .I right .IR children , each of which is a binary tree. The node is the .I parent of its children. The node is sometimes called the tree's .IR root ; if a node's children aren't nil, then they are the roots of their own .IR subtrees . Computer scientists learned about trees from genealogists, who are bad at botany and think that a tree has its root at the .I top and grows .IR downwards . .PP A binary tree's .I height is the largest number of links you have to follow, starting from the root, before you get to a nil subtree. Most operations on binary trees work by descending a tree, starting from the root and following links to children; or ascending, working from parent to child until reaching the root, so the tree's height is the primary limit on its performance. Xyla implements several different kinds of binary trees which differ according to the methods used to limit tree height, and the resulting performance tradeoffs. For example, AVL and red-black trees offer deterministic good .RI ( O (log\ n )) performance, but have relatively complicated and expensive algorithms; splay trees offer good .I amortized performance, but restructure trees as a result of conceptually read-only operations, and any single operation may require .IR O ( n ) time; and treaps are randomized, offering .I expected good performance, but require a good source of random data, and may deliver poor performance in any given instance, though with low probability. . .SS "Intrusive data structures" The trees implemented by Xyla are .IR intrusive : the storage used to maintain the tree structure is provided by the caller as part of its data. The `intrusion' into the caller's data consists of a small (generally about three words) structure to be included as part of the caller's data. Each kind of tree uses a different intrusion type. .PP The intrusion is most conveniently placed first, but circumstances may dictate other positions. For example, it's possible that a node is part of several trees at the same time: they require distinct intrusions, and at most one can be placed first. A macro like the following may be useful in such cases. .VS .ta 2n +\w'\fB('u 56n #define CONTAINER(type, mem, p) \e (!sizeof((p) == &((type *)0)->mem) + \e (type *)((unsigned char *)(p) \- offsetof(type, mem))) .VE . .SS "Iterators" An .I iterator is a data object which keeps track of the progress of an iteration. Once initialized, an iterator returns each node in the tree, in sequence order: either left to right, or right to left. (Iteration is therefore .I external rather than .IR internal . External iteration doesn't pervert the control structure of the calling program.) .PP Iterators are simple structures that hold no resources. A tree isn't changed at all when an iterator is created, or when an iterator is advanced: any number of iterators can be active on a single tree at a time; and it is safe for several threads to iterate over a single tree using their own separate iterators. It is safe to discard an iterator without ceremony once it's no longer needed. .PP Iterators are invalidated by operations which alter the structure of a tree: insertions, removals, splitting, joining, and set operations. (Splay tree iterators are an exception to this rule: a splay tree iterator remains valid as long as the .I next node in the sequence continues to exist.) .PP (The primary reason for supporting right-to-left iteration is .IR serialization . A number of serialization formats precede each item with a .I length or .I count field. In some formats \(en notably DER, MessagePack, and CBOR \(en this field may itself have variable size, depending on its value. Writing such formats is much more easily done in reverse order.) . .SS "Paths" A .I path is a data object which identifies a position in a binary tree. A .I full path identifies the position of a .I "target node" in the tree; an .I empty path identifies the position of the .I gap between two nodes, or at one of the extreme ends of the tree. Note that a nil tree has a single gap, and, in general, a tree containing .I k nodes has .IR k "\ +\ 1" gaps. .PP Paths are simple structures that hold no resources. A tree isn't changed at all when a path is created, or when a path is modified: any number of paths can be active on a single tree at a time; and it is safe for several threads to navigate a single tree using their own separate paths. It is safe to discard a path without ceremony once it's no longer needed. Paths can also be copied safely, e.g., by assignment or by calling .BR memcpy (3), but they may be quite large and most of the space isn't used except in very large trees. Functions are provided to copy just the relevant parts of paths. (Splay tree paths are small and cam be copied simply. But a function is still provided, for the sake of consistency.) .PP Functions are provided to establish a path referring to particular nodes, and to adjust paths so as to refer to other related nodes and gaps. These operations split into two categories. .hP \*o .I Logical operations act on the sequence of items held in a tree. There are operations to establish a path referring to the first or last node, to adjust a path so as to refer to the following or preceding node, or to the gap just before or after the current node. .hP \*o .I Structural operations act on the structure of the tree. There is an operation to establish a path referring to the tree's root node, and operations to adjust a path to as to refer to the current node's parent, or to its left or right child. .PP A path can also be established by .I probing the tree: see .B Searching below. .PP Like iterators, paths are invalidated by operations which alter the structure of a tree: insertions, removals, splitting, joining, and set operations. (Splay tree paths are an exception to this rule: a full splay tree path remains valid as long as the node to which it refers continues to exist; an empty splay tree path remains valid as long as the .I parent node \(en whose nil subtree denotes the gap to which the path refers \(en exists.) . .SS "Searching" Trees are searched under the control of a .IR "navigation function" . This is given a pointer to a node and an opaque argument pointer (and also a pointer to a node class, discussed below), and returns a negative, zero, or positive value, according to whether the position sought is before, equal to, or after the given node. For traditional .IR "binary search trees" , where the nodes ordering matches the order of some .I search key held as part of each node, the navigation function works very much like the comparison functions used by the .BR bsearch (3) and .BR qsort (3) functions in the standard C library, except that the arguments are in the opposite order. .PP A .I lookup just returns a pointer to the matching node, or null. A .I probe additionally fills in a path describing the position of the node in the tree, or, if no matching node was found, the gap where a matching node would be had it been found. . .SS "Node classes" Many of Xyla's functions expect a .I node class argument. A node class is represented by a simple structure holding a pointer to an .I operations table mostly containing pointers to functions. Applications can include additional data following the operations-table pointer, which the node-class functions can make use of. Simple uses don't require a node class, and you can safely pass a null pointer if you don't need the additional functionality. .PP Consistent with this, searching a binary search trees needs only an appropriate navigation function. However, invariant-checking and set operations require an .IR "ordered node class" , which is a node class with an extended operations table containing additional function pointers. Specifically, an ordered node class provides a navigation function for searching a tree for a given key, and a .I key-projection function which returns a pointer to a node's search key, suitable for passing to the navigation function, given the address of the node itself. . .SS "Tree operations" A node can be inserted in the gap indicated by an empty path; and the node identified by a full path can be removed from the tree. .PP A tree can be .I split at the position indicated by a path. The result consists of two trees, one containing all of the nodes strictly .I preceding the position indicated by the path, and one containing all of the nodes strictly .I following that position; if the path is full, then the indicated node is returned separately, being part of neither tree. .PP A pair of trees can be .IR joined , with an optional .IR "middle node" : the result is a single tree containing, in order, all the nodes of the first tree, followed by the middle node, if present, followed by all the nodes of the second tree. .PP Finally, binary search trees which have an ordered node class can be processed as .IR sets . Trees are considered as representing sets, with each node representing an element; equality of elements is determined by the ordered node class key-projection and navigation functions. Functions are provided to determine set unions, intersections, and differences. In order not to lose any nodes, the functions provided are a little strange. .hP \*o A .I unisect operation determines both the union and intersection of two given sets. Since the union consists of one copy of each element occurring in .I either set, and the intersection consists of one copy of each element occurring in .I both sets, the union and intersection trees between them hold every node of the input trees. .hP \*o A .I diffsect operation determines both the difference and intersection of two given sets. Since the difference consists of the elements of the first input set which .I don't occur in the second, while the intersection consists of the elements of the first input set which .I do occur in the second, the difference and intersection trees between them hold every node of the first input tree; the second input tree is unchanged. . .SS "Summary data" Xyla trees can maintain per-node .IR "summary data" . This can be any information that an application wants to keep track of that can be determined efficiently from a node itself, and its children \(en ideally in constant time. A common example of a summary would be a count of the number of nodes in the subtree rooted at a particular node: a node's count is clearly equal to the sum of its children's counts, plus one for itself, with an nil subtree having count zero. This can be used to find the .IR k th node in order efficiently, using an appropriate navigation function. .PP Summary data is maintained by an .I update function provided by a node class. The update function is called every time a node's children are modified, as a result of restructuring the tree. An update can also be requested explicitly if an application changes a node's summary data itself, and needs to .I ripple the change to the node's ancestors. .PP Finally, functions are provided to .I ascend the tree, accumulating information about the node and its ancestors. If each node records a count of nodes in its subtree, then ascending the tree can be used to determine the node's position in the overall sequence. (See the .B EXAMPLES below for how all of this can be done.) . .SS "Checking" Binary trees are complex structures, and it's easy for bugs to creep in. Xyla provides a framework for walking a binary tree reliably, and checking that it respects the required invariants. Applications can supply additional checks via the node class, to verify that summary data (described above) is computed correctly. .PP The core of the checking machinery is a nonrecursive tree walk which is robust against tangled links, and can associate additional caller-provided information with each node. . .SS "Heights" For some kinds of trees (specifically, AVL and red-black) the .I join operation is faster if the caller already knows some simple metric about the tree. For AVL trees, this is literally the tree's height, while for red-black trees, it's the .IR black-height , i.e., the (invariant) number of black nodes on every path starting from the root. We call this measure the .IR height , just because it's related to the height in the current examples. The height of a nil tree is always zero. The insertion and deletion operations on trees for which height is significant return a distinctive status code to indicate that the tree's height has changed by one. Callers uninterested in maintaining the tree height can safely ignore this. Operations involving splitting and joining trees (including the set operations) have additional arguments (compared to the versions for trees for which height is unimportant) for the application to provide the heights of input trees (or \-1 if the application neglected to keep track) and/or receive the heights of the output trees. The invariant-checking functions for trees where height is relevant have an additional argument so that, if the application is tracking the tree height, it can check that it has the correct value. . .\"-------------------------------------------------------------------------- .SH "FUNDAMENTALS" . .SS "Return codes" A number of Xyla functions return an integer-valued .IR "return code" . Each possible return code has a name of the form .BR XYLA_RC_ ...\&. A nonnegative value indicates that the operation completed successfully; a negative value indicates failure. The most common success code is .BR XYLA_RC_OK , which has the value zero, but others are possible. .PP Currently, Xyla has very few opportunities for failure. Indeed, many functions are defined to return success in all cases. A future version of Xyla will not return a code that the current version does not return to the same application. .PP The currently defined return codes are as follows. .TP .B XYLA_RC_OK The operation completed successfully. This code has the numeric value zero. .TP .B XYLA_RC_HTCHG The operation \(en insertion or removal \(en completed successfully, and, as a result, the tree's height has changed, by exactly one. .TP .B XYLA_RC_FAIL The operation failed in an unspecific manner. This code has the numeric value \-1. Xyla itself will not originate this code: it can only come from an application-provided function. .TP .B XYLA_RC_TALL The operation failed because the tree height exceeded a predefined limit. .TP .B XYLA_RC_BAD The tree structure was determined to be invalid. .TP .B XYLA_RC_NOMEM The operation failed because memory could not be allocated. .PP Additionally, two constants .B XYLA_RC_LOSELIMIT and .B XYLA_RC_WINLIMIT are defined. Every failure code has a value greater than or equal to .B XYLA_RC_LOSELIMIT and strictly less than zero; and every success code has a value greater than or equal to zero, and strictly less than .BR XYLA_RC_WINLIMIT . . .SS "Node structures" The application is responsible for defining the structure of a node. Indeed, with the exception of the checking framework, Xyla works only with memory provided by the application; but there must be some space for Xyla to maintain its tree structures, which we call the .IR "control data" . At minimum, each node must have links to its left and right subtrees; though, in fact, each kind of tree requires some additional control data beyond the two links. .PP The left and right links are held in a simple structure named .BR "struct xyla_bt_node" ; the links themselves are members of this structure named .B left and .BR right , in that order, and there are no other members. The .B left and .B right links are each a pointer to the .B "struct xyla_bt_node" structure within the referenced node, which must be explicitly converted to a containing type. .PP As mentioned, each kind of tree has its own additional control data. The whole control data for node is packaged up in a structure named .BI "struct xyla_" tree "_node" \fR. The easy way to allocate space for the control data in an application's node structure is simply to include a member of type .BI "struct xyla_" tree "_node" somewhere in the node structure; this member is called the .IR intrusion . It's often most convenient to place the intrusion as the first member in the node structure: this makes it easiest to convert between pointers to the node as a whole and to the intrusion. .PP The cost of this simple approach is that it can waste a little storage. The additional control data for some trees is an .B unsigned integer field containing a small number of flags. (The majority of this field is available for application use: see .B "TREE IDIOSYNCRASIES" below.) Since the .B left and .B right links are both pointers, on a 64-bit target, the overall intrusion structure ends up with a four-byte `gap' at the end. (There is no such gap on 32-bit targets. Other targets may have different structure layout problems.) .PP This gap can be filled if the application defines its node structure in a more complicated way. Each tree type defines its additional control data as a .I separate structure, named .BI "struct xyla_" tree "_nodeslots" \fR; the .BI "struct xyla_" tree "_node" structure is then .IP .ne 4 .nf .ta 2n .BI "struct xyla_" tree "_node {" .B " struct xyla_bt_node bt;" .BI " struct xyla_" tree "_nodeslots " t ; .B "};" .fi .PP where .I t here is an abbreviated `nickname' for the tree type. This sequence of members is defined in a macro .BI XYLA_ TREE _NODEPFX so that it can be inserted directly into a node structure. There, it can be followed immediately by additional small members in order to make use of the otherwise wasted space. Of course, now we've lost the real .BI "struct xyla_" tree "_node" structure which is occasionally needed, most notably for insertion. The solution here is to define a .I union with one member having the type .BI "struct xyla_" tree "_node" for passing into the library, while another contains the additional data packed into the gap. Again, a macro is provided: .BI XYLA_ TREE _NODEUSFX lists entire control-data structure types, this time in decreasing inclusion order. .PP For example, one might define .IP .ne 10 .nf .ta 2n +2n +2n .B "struct node {" .B " union {" .B " struct {" .B " XYLA_AVL_NODEPFX;" .B " unsigned info;" .BR " /* " "additional members here" " */" .B " } node;" .B " XYLA_AVL_NODEUSFX;" .B " } u;" .BR " /* " "additional members here" " */" .B }; .fi .PP Other tricks are surely possible. . .SS "Trees" A tree as a whole is represented by a pointer to its .IR "root link" : i.e., the address of a pointer to the root node. The root link will be null if and only if the tree is nil (empty); a new tree is initialized simply by setting its root link to null. .PP In the Xyla programming interface, the root link always has type .BR "struct xyla_bt_node" , rather than the more specific node type for a particular kind of node. . .SS "Navigation functions" A .I "navigation function" is represented by the type .IP .nf .ne 3 .ta \w'\fB\&typedef int xyla_bt_navfn('u .BI "typedef int xyla_bt_navfn(struct xyla_bt_nodecls *" cls , .BI " const struct xyla_bt_node *" node , .BI " void *" arg ); .fi .PP The search functions are given a pointer to a navigation function to guide them through the tree to find the intended node. The .I cls and .I arg arguments are provided to the search function by the caller. The .I arg pointer is not otherwise interpreted; the .I cls is used to determine a default navigation function if none is otherwise provided \(en see .B "Ordering operations" below. The .I node argument points to a node within the tree. The navigation function returns an integer, which must be negative if the desired node is earlier in the sequence than the given .IR node , positive if the desired node is later in the sequence, or zero if the .I node is in fact the desired node. .PP A navigation function for a binary search tree simply compares the search key against the given node, and returns negative if the search key orders before the key in the node, zero if they're equal, or positive if the key orders after the key in the node. . .SS "Node classes" A .I node class is represented by a pointer to a value of type .BR "struct xyla_bt_nodecls" , which has a single member: .VS .ne 3 .ta 2n struct xyla_bt_nodecls { const struct xyla_bt_nodeops *ops; }; .VE The .B "struct xyla_bt_nodeops" structure will be explained in full below. For now, it is mostly a table of function pointers, together with other constant data. The function pointers are each provided, as an argument, the original node-class pointer. If the .B "struct xyla_bt_nodecls" structure is embedded in a larger structure (usually, again, at the beginning, but this is not necessary) then these functions can readily recover the larger structure, which may contain additional constext information, or maintain state which the functions must maintain. .PP The .B "struct xyla_bt_nodeops" structure may, in fact, be only the first part of a larger table of information. To allow for later compatible extensions, first member of .B "struct xyla_bt_nodeops" simply holds the size of the overall structure, as allocated. One might therefore write .VS .ta 2n static const struct xyla_bt_nodeops my_nodeops = { sizeof(my_nodeops), ... }; .VE to initialize such a table. . .SS "Node operations" The basic operations are held directly in the .B "struct xyla_bt_nodeops" structure. .VS .ne 5 .ta 2 struct xyla_bt_nodeops { size_t sz; xyla_bt_updfn *upd; xyla_bt_dummyfn *_rsvd2, *_rsvd3; }; .VE The .B sz member was described above. .PP The .B upd function is called after a node has been changed, and is expected to update any .I summary data held by the node. If either the node-class pointer or the function pointer is null, then nothing happens. (This slightly improves the speed of operations like insertion and removal, since they don't necessarily need to trace all the way to the tree root.) .IP .ne 2 .nf .ta \w'\fB\&typedef void xyla_bt_updfn('u .BI "typedef void xyla_bt_updfn(struct xyla_bt_nodecls *" cls , .BI " struct xyla_bt_node *" node ); .fi .PP The function is given the node-class pointer .IR cls , and a pointer to the .I node that has been changed. It is expected to find the node's children by inspecting the .B left and .B right links directly. .PP The .B _rsvd2 and .B _rsvd3 members are reserved for future expansion, and must be null for forward compatibility. . .SS "Ordering operations" You don't need to to anything more to make a binary search tree work in Xyla than to write the appropriate navigation function and pass it to the search function. .PP Ordering operations are additional functions which can be provided by a node class in order to compare two nodes, rather than compare a node with a search key. The additional operations are part of .BR "struct xyla_bt_ordopslots" : .IP .nf .ne 4 .ta 2n .B "struct xyla_bt_ordopslots {" .B " xyla_bt_navfn *nav;" .B " xyla_bt_keyfn *key;" .B "};" .fi .PP This is bundled together with the basic .B "struct xyla_bt_nodeops" in .BR "struct xyla_bt_ordops" : .IP .nf .ne 4 .ta 2n .B "struct xyla_bt_ordops {" .B " struct xyla_bt_nodeops node;" .B " struct xyla_bt_ordopslots ord;" .B "};" .fi .PP The application allocates and fills in a .B "struct xyla_bt_ordops" , and then sets the .B ops pointer of the .B "struct xyla_bt_nodecls" to point to the .B node member. .PP The .B nav member is a navigation function, as described above. .PP The .B key member is a pointer to a .IR "key function" : .IP .nf .ie t \{\ . ne 2 . ta \w'\fB\&typedef const void *xyla_bt_keyfn('u . BI "typedef const void *xyla_bt_keyfn(struct xyla_bt_nodecls *" cls , . BI " const struct xyla_bt_node *" node ); .\} .el \{\ . ne 3 . ta 2n +\w'\fB('u . BI "typedef const void *xyla_bt_keyfn" . BI " (struct xyla_bt_nodecls *" cls , . BI " const struct xyla_bt_node *" node ); .\} .fi .PP Given a pointer to a tree node, the key function must return a pointer to its search key. The required property is that, if .I node is some node within a tree with root .IR root , then .IP .BI "xyla_" tree "_lookup(" cls ", " root , .IB navfn ", " keyfn "(" cls ", " node )) .PP should find the original .IR node . .PP The pointer that the key function returns must remain valid for the duration of the current call into the Xyla library. The key function is expected to be fast' it should not need to allocate memory: releasing any allocated memory later is the application's responsibility. .PP If it's possible to write a simple key function which efficiently extracts a search key from a node that works with the navigation function used for searching, then that's probably the best approach. The alternative is for the key function simply to return the node pointer unchanged: then the order-operation navigation function compares the two nodes directly against each other and returns the appropriate ordering. Of course, this will probably have to be a .I different navigation function from the one used for general search. .PP The ordering operations are used in four contexts. .hP \*o The navigation function will be used as a fallback by Xyla's tree search functions if their .I navfn argument is null. This indicates to a reader that you're searching the tree in the usual way, and saves a little typing, in the (hopefully common) case where the ordering-operations and search navigation functions are identical. .hP \*o The ordering operations are used by the .BI xyla_ tree _splitat function to find the split point within a tree. .hP \*o The .B "Set operations" described below use .BI xyla_ tree _splitat \fR, and therefore require the ordering operations. .hP \*o The .B xyla_bt_chkorder function can be used as part of the checking machinery to verify that a tree satisfies the ordering properties required by the ordering operations. See .B "CHECKING FRAMEWORK" below. . .\"-------------------------------------------------------------------------- .SH "MISCELLANEOUS FUNCTIONS" . This section describes functions of general utility which aren't part of the implementation of a specific tree structure. .PP The .B xyla_strerror function returns a pointer to a human readable message string corresponding to a return code .IR rc . If .I rc is not a defined error code, the function returns a pointer to a generic `unknown error code' message. The returned pointer will remain valid indefinitely. The function is safe to call from multiple threads concurrently. .PP The .B xyla_bt_severfirst function detaches the first (leftmost) node from a tree, and returns a pointer to it; if the tree is nil, then a null pointer is returned. The tree structure is modified: a complete iteration completes in .IR O ( n ) time, using only constant space. The modified structure preserves the node ordering, but is very unlikely to satisfy the usual tree invariants. In particular, it does .I not call any application-provided update function. Once this function has been called on a tree, the only safe thing to do with the tree is to continue severing nodes until the tree is nil. . .\"-------------------------------------------------------------------------- .SH "TREE OPERATIONS" . This section describes the main functions provided to operate on binary trees. Each kind of tree provides its own version each of the following functions, but they behave in very similar ways, so they are all described here together. In the following descriptions, .I tree (in italics) stands for the name of one of the kinds of tree: .BR avl , .BR rb , .BR splay , or .BR treap . .PP Behaviour specific to particular kinds of tree is described in the next section, .BR "TREE IDIOSYNCRASIES" . . .SS "Iteration" Iteration produces the nodes in the tree in their proper order, from start to end. An iterator value has the type .BI "struct xyla_" tree "_iter" \fR. .PP An iterator is initialized by calling .BI xyla_ tree _inititer \fR, passing it pointers to the tree's root node, and the iterator to initialize. .PP The .BI xyla_ tree _next function returns the next node in sequence (so, for example it returns the first node when called on a freshly initialized iterator). Once there are no more nodes to return, .BI xyla_ tree _next returns null. .PP Iterators don't need any kind of explicit teardown. .PP Iterators become invalid when the tree is changed, i.e., if a node is inserted or removed, the tree is split, or the tree is used as an operand to a join or set operation. (Splay tree iterators remain valid in more cases than this: see .B "TREE IDIOSYNCRASIES" below.) It is guaranteed that continuing with iteration will not access any node that has been previously returned. Consequently, code such as .VS struct xyla_\fI\&tree\fP_iter it; xyla_\fI\&tree\fP_inititer(&root, &it); for (;;) { node = xyla_\fI\&tree\fP_next(&it); free(node); } .VE is a valid way to free all of the nodes of a tree which have been allocated using .BR malloc (3). However, the .B xyla_bt_severfirst function is probably better for this task, .PP .I "Reverse iteration" produces the nodes in the tree in reverse order, from end to start. A reverse iterator has a distinct type, .BI "struct xyla_" tree "_riter" \fR, to prevent mix-ups. It's not possible to `switch' iteration direction: this kind of flexibility is possible using paths, but not with iterators. .PP The functions .BI xyla_ tree _initriter and .BI xyla_ tree _prev work exactly like .BI xyla_ tree _inititer and .BI xyla_ tree _next described above, except that they operate on reverse iterators, and return the nodes in the opposite order. . .SS "Path operations" A path describes the position of a .I "target node" in a tree (a .IR "full path" ), or a .I gap between two nodes or at either end of the sequence (an .IR "empty path" ). A path value has the type .BI "struct xyla_" tree "_path" \fR. .PP Several functions initialize paths: .BI xyla_ tree _copypath \fR, .BI xyla_ tree _firstpath \fR, .BI xyla_ tree _lastpath \fR, and .BI xyla_ tree _rootpath are described later in this section; .BI xyla_ tree _probe is described under .B "Search, insertion, and removal" below. .PP Paths don't need any kind of explicit teardown. .PP Paths become invalid when the tree is changed, i.e., if a node is inserted or removed, the tree is split, or the tree is used as an operand to a join or set operation. (Splay tree paths remain valid in more cases than this: see .B "TREE IDIOSYNCRASIES" below.) .PP The function .BI xyla_ tree _current is passed a pointer to a path. if the path is full, it returns a pointer to the target node; if the path is empty, then it returns null. .PP The function .BI xyla_ tree _copypath sets .I newpath to be a copy of .IR path . You can copy a path using ordinary assignment, but the function knows how to copy only the currently active parts of the path structure: for most tree types, a path structure contains a vector of pointers (see under .B PRIMITIVES for more detail), which is big enough for the largest possible tree. Most of the vector is unused for most trees, so copying it is a waste of time; but a structure assignment doesn't know how to do that. .PP The next group of functions to be discussed initializes or modifies a path according to the .I logical sequence represented by a tree. .PP The function .BI xyla_ tree _firstpath initializes a .I path to target the first node in the tree whose root link is at .I root , and returns a pointer to this first node. If the tree is nil, then the path will be empty, and the function returns null. The function .BI xyla_ tree _lastpath is similar, except that it targets and returns the last node in the sequence rather than the first. .PP The function .BI xyla_ tree _nextpath modifies an existing .I path so as to target the next node in sequence, if any. If the .I path initially targets a node other than the last in the sequence, then it is modified to target the next node in sequence. If the .I path initially targets a gap other than the gap after the last node, then it is modified to target the node following the gap. If the .I path initially targets the last node in sequence, then it is updated to target the gap after the last node. If the .I path initially targets the gap after the last node, then it is unchanged. The function returns the next node, if there is one, or null otherwise. The function .BI xyla_ tree _prevpath is similar, except that it targets and returns the previous node in the sequence, unless the .I path currently targets the first node or the gap before it, in which case it targets the gap before the first node and returns null. .PP The function .BI xyla_ tree _beforepath modifies an existing full .I path so as to target the gap immediately before the current target node \(en so following with a call to .BI xyla_ tree _nextpath would target and return the original target node again, and following with a call to .BI xyla_ tree _prevpath would target and return the previous node, as if .BI xyla_ tree _beforepath had not been called. The function .BI xyla_ tree _afterpath is similar, except that it targets the gap immediately after the current target node. It is an error to call .BI xyla_ tree _beforepath or .BI xyla_ tree _afterpath with an empty .IR path . .PP The next group of functions to be discussed initializes or modifies a path according to the .I structure of a tree. .PP The function .BI xyla_ tree _rootpath initializes a .I path to target the root node of the tree whose root link is at .IR root , and returns a pointer to the root node. If the tree is nil, then the .I path is initialized to target the single gap, and the function returns null. .PP The function .BI xyla_ tree _uppath modifies an existing full .I path so as to target the parent of the current target node, sets .BI * pos_out to .B XYLA_BTPOS_LEFT or .B XYLA_BTPOS_RIGHT respectively according to whether the current target node is the left or right child of its parent, and returns the parent node. If the current target node is the tree root, then the function leaves .I path unchanged, sets .BI * pos_out to .BR XYLA_BTPOS_ROOT , and returns null. It is an error to call .BI xyla_ tree _uppath with an empty .IR path . .PP The function .BI xyla_ tree _leftpath modifies an existing full .I path so as to target the current target node's left child, and return this left child; if the left child is nil, then instead the .I path is modified to target the gap immediately before the current target node, and the function returns null. The function .BI xyla_ tree _rightpath is similar, except that it targets and returns the current target node's right child, or the gap immediately after the current target node. It is an error to call .BI xyla_ tree _leftpath or .BI xyla_ tree _rightpath on an empty .IR path . .PP The function .BI xyla_ tree _replace replaces the current target node of a full .I path with a new .IR node . The previous target node is no longer part of the tree structure and can be released or otherwise invalidated. No update functions are called: call .BI xyla_ tree _ripple if necessary. It is an error to call .BI xyla_ tree _replace with an empty .IR path . .PP The function .BI xyla_ tree _ripple is passed a node-class pointer .I cls and a full .IR path , and updates the summary data in the target node's ancestor chain, which is useful if the application has changed the node's summary data so that the summaries held by the node's ancestors are now out of date. The function calls the node class's .B upd function on the parent of the .IR path 's current target node, then the parent's parent, and so on, and finally it calls .B upd on the tree root. The .B upd function is not called on the target node itself. If the .I path targets the root node then nothing happens. In any event, the .I path is left unchanged and valid. It is an error to call .BI xyla_ tree _ripple with an empty .I path , with a null .IR cls , or with a null .B upd function pointer in the node class operations table. .PP The function .BI xyla_ tree _ascend ascends a tree, starting from a the target node or gap of a .IR path , calling a function .I fn on each node as it goes. It is used to compute information about a node or gap using the summary data of the nodes along the path: see the .B EXAMPLES below. The .I fn is called with five arguments: .IP .nf .ne 4 .ta \w'\fB\&typedef int xyla_bt_ascendfn('u .BI "typedef int xyla_bt_ascendfn(struct xyla_bt_node *" node , .BI " struct xyla_bt_node *" parent , .BI " struct xyla_bt_node *" sibling , .BI " unsigned " pos ", void *" arg ); .fi .PP It is called at least once: in the first call, if the .I path is full, then .I node points to the .IR path 's target node; otherwise .I node is null. The .I parent argument is the .IR node 's parent, or the node whose nil child link represents the gap that is the .IR path 's target; if the .I node is the root node, or the tree is nil, then .I parent is null The .I sibling argument is the other child link of the .IR parent , i.e., the one that is not .IR node ; this is null if .I parent is null. The .I pos argument is .B XYLA_BTPOS_ROOT if the .I node is the root node, or the tree is nil \(en i.e., if and only if .I parent is null; .I pos is .B XYLA_BTPOS_LEFT or .B XYLA_BTPOS_RIGHT respectively if .I node is the left or right link of .IR parent . The .I arg argument to .I fn is the .I arg passed to .BI xyla_ tree _ascend \fR. If .I fn returns a nonzero value, then .BI xyla_ tree _ascend returns that nonzero value immediately; otherwise, if the initial .I node was the tree root, or the tree is nil \(en i.e., if the first call had .I pos equal to .B XYLA_BTPOS_ROOT \(en then .BI xyla_ tree _ascend returns zero. Otherwise, it calls .I fn again, this time with .I node equal to the previous .IR parent , and the other arguments set appropriately. This process continues until either .I fn returns a nonzero value, which becomes the return value of .BI xyla_ tree _ascend \fR, or .I fn returns zero when passed the root node, in which case .BI xyla_ tree _ascend returns zero. . .SS "Search, insertion, and removal" The function .BI xyla_ tree _lookup searches a tree. It is given a pointer .I cls to a node class, which may be null; the address .I root of the tree's root link, a navigation function .IR nav , and an argument pointer .I arg to pass to the navigation function. .PP The search begins at the root and descends through left and right child links, as directed by a navigation function. If the desired node is found \(en i.e., the navigation function returns zero given some node \(en then .BI xyla_ tree _lookup returns a pointer to that node; otherwise, it returns null. If .I nav is null then the navigation function from the node class's order operations is used; it is an error if .I nav is null and either .I cls is null or does not have order operations. .PP The function .BI xyla_ tree _probe is similar, except that it also initializes a given .IR path . If the desired node was found, then .I path is initialized to target the node; otherwise, the .I path is initialized to target the gap in which the selected node would have been (or, more concretely, the gap corresponding to the nil subtree which terminated the search). .PP The function .BI xyla_ tree _insert inserts a new .I node into the gap targetted by an empty .IR path . It is an error to provide a full path to this function. The function returns .B XYLA_RC_OK normally, or .B XYLA_RC_HTCHG if the tree height increased by one. (Tree types which don't have a notion of height never return .BR XYLA_RC_HTCHG .) .PP The function .BI xyla_ tree _remove removes the node targetted by a full .I path from its tree. It is an error to provide an empty path to this function. The function returns .B XYLA_RC_OK normally, or .B XYLA_RC_HTCHG if the tree height decreased by one. (Tree types which don't have a notion of height never return .BR XYLA_RC_HTCHG .) . .SS "Joining and splitting" The function .BI xyla_ tree _join concatenates two trees, together with an optional separating node. The resulting tree consists of, in order, the nodes from the .I left tree; the .I mid node, if this is not null; and the nodes from the .I right tree. The root of the resulting tree is stored in .BI * root_out \fR; null pointers are stored in .BI * left and .BI * right \fR. It is permitted for .I left or .I right to be equal to .IR root_out . If the tree type has a notion of height, then there are additional arguments. the caller can supply the heights of the left and right trees as .I lht and .IR rht ; either or both of these may be \-1 to indicate that the relevant tree's height isn't known. The height of the output tree is stored in .BI * rootht_out if this is not a null pointer. (Note that the output height is always determined correctly, even if the input heights are unknown.) The function always returns .BR XYLA_RC_OK . .PP The function .BI xyla_ tree _split splits a tree at the position indicated by a path. In general, the result consists of three pieces: a (possibly nil) .I left tree containing the nodes before the indicated position; the path's target node, if the path is full; and a (possibly nil) .I right tree containing the nodes after the indicated position. The root of the left tree is stored in .BI * left_out \fR; the target node, or a null pointer if the path is empty, is stored in .BI * mid_out \fR; and the root of the right tree is stored in .BI * right_out \fR. If the tree type has a notion of height, then the heights of the left and right trees are stored in .BI * lht_out and .BI * rht_out \fR; these pointers can be null to discard the height information. On exit, the original root is set to a null pointer, unless the original root address is equal to one of the output pointers. The function always returns .BR XYLA_RC_OK . .PP The function .BI xyla_ tree _splitat splits a binary search tree, whose root pointer is at .BI * root \fR, at the position indicated by a search .IR key . In general, the result consists of three pieces: a (possibly nil) .I prefix tree containing the nodes whose keys order before the given key; the node whose key matches the given key, if any; and the path's target node, if the path is full; and a (possibly nil) .I suffix tree containing the nodes whose keys order after the given key. Calling this function is exactly the same as calling .BI xyla_ tree _probe to find the position of the matching node followed by .BI xyla_ tree _split to split the tree at that point. The function always returns .BR XYLA_RC_OK . .PP The function .BI xyla_ tree _splitroot splits the tree, whose root pointer is at .BI * root \fR, at its root. In general, the result consists of three pieces: the root's left subtree; the root node itself, if the tree was not initially nil; and the root's right subtree. (Both the left and right subtrees are nil if the input tree was nil.) The root of the left subtree is stored in .BI * left_out \fR; the root node, or a null pointer if the tree was nil, is stored in .BI * root_out \fR; and the root of the right subtree is stored in .BI * right_out \fR. If the tree type has a notion of height, then the input tree's height is provided as .IR ht ; this may be \-1 if the correct value is unknown. The heights of the left and right subtrees are stored in .BI * lht_out and .BI * rht_out \fR; these pointers can be null to discard the height information. On exit, the original root is set to a null pointer, unless the original root address is equal to one of the output pointers. The function always returns .BR XYLA_RC_OK . . .SS "Set operations" The functions .BI xyla_ tree _unisect and .BI xyla_ tree _diffsect operate on trees representing .I sets of items. Each function receives two input trees, named .I A and .IR B . Such a tree must be a binary search tree, strictly ordered according to the .I key and .I nav operations of the provided node class; the output trees will be ordered in the same way. We shall say that a key is .I common if both input trees contain a node matching that key, or that the key is .I A-only or .I B-only if only the .I A or .I B tree, respectively, contains a node matching that key. In the case of a common key, then we say that the two nodes bearing that key, one from each of the .I A and .I B trees, .IR match . .PP The function .BI xyla_ tree _unisect simultaneously computes the union and intersection of two sets represented as binary search trees .I A and .IR B . The input trees are destroyed: each individual node ends up in one or other of the output trees. Nodes with .IR A -only and .IR B -only keys end up in the union tree; one of each pair of matching nodes ends up in the union tree, and the other goes into the intersection tree. The addresses of the input tree roots are supplied as the .I aroot and .I broot arguments; the output tree roots are written to .BR * uni_out and .BR * isect_out \fR. For tree types with a notion of height, the heights of the input trees are provided as .I aht and .IR bht ; either or both of these may be \-1 if the caller doesn't know the correct value. The heights of the output trees are stored in .BR * uniht_out and .BR *isectht_out \fR; either or both of these may be null to discard the height information. The function always returns .BR XYLA_RC_OK . .PP The function .BI xyla_ tree _diffsect simultaneously computes the difference and intersection of two sets represented as binary search trees .I A and .IR B . The input tree .I A is destroyed: each node ends up in one or other of the output trees. The .I B tree is not modified. Nodes with .IR A -only keys end up in the difference tree; nodes with common keys end up in the intersection tree. The addresses of the input tree roots are supplied as the .I aroot and .I broot arguments; the output tree roots are written to .BR * diff_out and .BR * isect_out \fR. For tree types with a notion of height, the heights of the input .I A tree is provided as .IR aht ; this may be \-1 if the caller doesn't know the correct value. The heights of the output trees are stored in .BR * diffht_out and .BR *isectht_out \fR; either or both of these may be null to discard the height information. The function always returns .BR XYLA_RC_OK . . .SS "Checking" The function .BI xyla_ tree _check walks a tree and checks that all of the applicable invariants hold. The address of the tree root pointer is passed as .BI * root \fR. Diagnostics are printed to the .BR stdio (3) stream .IR fp ; commonly, this is .BR stderr , and it may be null to suppress output altogether. If the tree type has a notion of height, then .I expht should hold the application's understanding of the tree's height, to be checked against the true height; this may be \-1 if the height is unknown. If the node class .I cls provides checking operations then these will be called at the relevant points while the checking proceeds; the .I arg will be passed to these operations as the `user state' pointer. Note that checking will .I mutate the tree! It is not safe to check a tree while it is being accessed by another thread. See the section .B "CHECKING FRAMEWORK" below for full details. The function returns .B XYLA_RC_OK on success, .B XYLA_RC_BAD if problems were found, or some other nonzero value if checking was aborted. . .\"-------------------------------------------------------------------------- .SH "TREE IDIOSYNCRASIES" . .SS "AVL trees" An .I Adelson-Velsky\(en\&Landis .RI ( AVL ) tree is a height-balanced binary tree: the heights of the two subtrees of any node can differ by at most one. .PP AVL trees offer good deterministic performance, and are an excellent choice for general-purpose use. .PP AVL trees have a notion of `height', which is exactly the actual height of the tree, i.e., the maximum number of links which can be traversed starting from the root before a null link is encountered. .PP The structure of an AVL tree is adjusted when nodes are inserted or removed. This invalidates all iterators and paths referring to the tree. It is safe for multiple threads to search an AVL tree concurrently. .PP The additional control data associated with an AVL tree node consists of a flags word .IR f . The low two bits of the flags word are used by the implementation; the remaining bits are available for use by applications: Xyla will not alter these bits. The use of the low bits is subject to change in future versions, but, for diagnostic purposes, it might be useful to know that bit 0 is set if the node is `left-biased', i.e., the left subtree is higher (by one) than the right subtree; bit 1 is set if the node is `right-biased', i.e., the left subtree is shorter (by one) than the right subtee; both bits are clear if the node is `balanced', i.e., both subtrees have the same height. . .SS "Red-black trees" A red-black tree is a height-balanced binary tree, initially presented by Leonidas Guibas and Robert Sedgewick, as an encoding of Rudolf Bayer's 2-3-4 trees using binary trees. .PP Red-black trees are less well-balanced than AVL trees. They can perform better when trees are modified much more than they are searched. .PP Each node is assigned a colour, either red or black; the colour assignment must obey the following rules. .hP \*o The root is black. .hP \*o If a node is red, its children must be black. .hP \*o Every path from root to leaf must have the .I same number of black nodes, called the tree's .IR "black height" . .PP Red-black trees don't maintain such rigid balance as AVL. This may make update-heavy workloads somewhat more efficient, though the rebalancing algorithms aren't significantly simpler than those for AVL trees; search-heavy workloads are likely to perform worse. .PP The structure of a red-black tree is adjusted when nodes are inserted or removed. This invalidates all iterators and paths referring to the tree. It is safe for multiple threads to search a red-black tree concurrently. .PP Red-black trees have a notion of `height', which is the tree's black height. .PP The additional control data associated with a red-black tree node consists of a flags word .IR f . The least significant bit is used by the implementation; the remaining bits are available for use by the application: Xyla will not alter these bits. The meaning of the low bit is subject to change in future versions, but, for diagnostic purposes, it might be useful to know that bit 0 is clear if the node is black, or set if the node is red. . .SS "Splay trees" A .I "splay tree" is an adaptively restructured binary tree, designed by Daniel Sleator and Robert Tarjan. .PP Splay trees are generally very fast. However, splay trees are modified in response to search operations as well as insertions and removals, which make them less unsuitable when trees are shared between threads, or where their highly dynamic nature is otherwise undesirable. .PP Splay trees are restructured in response to search operations as well as insertions or removals. Iterators and paths .I "remain valid" when the tree is restructured. Of course, if a node is removed, then an iterator or path referring to that node becomes invalid. (An empty path may refer unpredictably to the node immediately to the left or right of the targetted gap.) As a result of the restructuring, it is .I not safe to access a splay tree from multiple threads concurrently, even if they are only searching the tree. .PP The additional control data associated with a splay tree node consists of a .I parent pointer: in the root node, this pointer is null. .PP The .B xyla_splay_rebalance function will forcibly rebalance an entire tree. The tree's root pointer is at .BI * root \fR; this pointer is updated to point to the new tree root. The resulting structure is not guaranteed, but the resulting tree's height will be minimal for the number of nodes. Iterators and paths remain valid. .PP The .B xyla_splay_splay function `splays' a tree at the place indicated by a .IR path . The result is that the target node (full path) or the node holding the null link to the target gap (empty path) is promoted to the tree root. Iterators and paths remain valid. . .SS "Treaps" A .I treap is a .I randomized binary tree, published by Seidel and Aragon. Treaps are balanced with high probability. .PP Every node is assigned a .IR weight ; a child node is not allowed to be lighter than its parent. If each node has a distinct weight, then the lightest node is the tree root, and the remaining nodes are divided into the root's left and right subtrees according to the ordering of the nodes; therefore the tree structure is uniquely determined by the node ordering and their weights. .PP Treaps perform very well on average. The maintenance algorithms are particularly simple and fast. However, the requirement for random weights can make them inconvenient for some applications. .PP Treaps do not have a notion of `height'. .PP The structure of a treap is adjusted when nodes are inserted or removed. This invalidates all iterators and paths referring to the treap. It is safe for multiple threads to search a treap concurrently. .PP The additional control data associated with a treap node is the weight .IR wt . This .I must be set by the caller before a node is inserted into the tree. A node's weight must not change while the node is linked into a treap. The replacement node passed to .B xyla_treap_replace must have the same weight as the node that it replacing. If the tree structure is to be well balanced, the weights must be .I independent of the node ordering. This is the caller's responsibility. For simple applications, where the node ordering comes from a trusted source, weights can be chosen without much ceremony, e.g., even .BR rand (3) is likely to be sufficient. If the node ordering is determined by an adversary, then weights should be determined using a cryptographically strong generator. .PP Because the tree structure is (nearly) uniquely determined by the node ordering and weights, rebalancing the tree is not possible. The treap iterator and path structures are allocated such that the probability that a treap of maximum size will be too tall with probability at most 2\*(ss128\*(se, assuming that weights are properly chosen. If the tree height is too great, Xyla will call the function .BR xyla__treap_boundfail . On hosted systems, this function is defined by the Xyla library: it simply prints a message on standard error and calls .BR abort (3). On freestanding systems, a suitable implementation of .B xyla__treap_boundfail must be provided by the application: it must not return to its caller. . .\"-------------------------------------------------------------------------- .SH "PRIMITIVES" . This section describes a number of utility functions and macros used to implement the trees described above. They are not intended for direct use by applications. . .SS "Iteration" The iteration machinery provided here can perform nonrecursive forward and reverse iteration over an arbitrary binary tree, i.e., using only the .B left and .B right links of .BR "struct xyla_bt_node" . .PP Iteration state is maintained in a stack, consisting of a vector .I stack of pointers to nodes, and a stack pointer .IR sp . The stack is initially empty, with .IR sp "\ =\ 0." The stack vector itself need not be initialized. During iteration, the topmost item on the stack, .IB stack [ sp "\ \-\ 1]" \fR, points to the next node, .IR n , to return; the lower entries contain those ancestors of .I n which have not already been returned by the iteration, from bottom (i.e., near the leaves of the tree) to top (i.e., near the root). .PP The macro .B XYLA_BT_STEPITER advances the state of an iteration. The .I rc argument is an lvalue to receive a return code; .I node is a pointer to an initial node; .I stack points to the stack vector; .I sp is an lvalue designating the stack pointer; .I max is the number of entries available in the stack vector; and .I back is either .B left for forward iteration, or .B right for reverse iteration. The behaviour is to push nodes onto the stack and follow .I back links starting from the provided .I node until a null link is encountered. If .I node is null, then nothing is done. On success, the macro sets .IB rc "\ =\ XYLA_RC_OK" \fR; if the stack vector is too small, then it sets .IB rc "\ =\ XYLA_RC_TALL" \fR, and the stack is left unchanged; no other return codes are possible. .PP An iteration state is established by calling .B XYLA_BT_STEPITER with .I node pointing to the root node of the tree. Once this is done, if .I sp is zero, then all nodes have been processed; otherwise, decrement .IR sp , process the .I node at .IB stack [ sp ] \fR, and call again with .IB node ->right (for forward iteration) or .IB node ->left (for reverse iteration). . .SS "Path operations" The path-management machinery can keep track of, and adjust the position of, a node or gap between nodes within an arbitrary binary tree, i.e., using only the .B left and .B right links of .BR "struct xyla_bt_node" . .PP The path state is maintained in a vector .I path of .IR "link addresses" , and an index .IR k . The first entry, .IB path [0] \fR, is always the address of the root link; each subsequent entry holds the address of the .B left or .B right link of the node addressed by the previous link. The topmost entry, .IB path [ k ] \fR, .RI ( not .IR k "\ \-\ 1)" is the link to the target node (a non-null link, indicating a full path) or gap (a null link, indicating an empty path). .PP Many of the macros below additionally take a .I htmax argument giving the maximum tree height which can be accommodated by the supplied .I path vector. A vector must therefore have space for .IR htmax "\ +\ 1" entries. .PP The macro .B XYLA_BT_CURRENT returns the node targetted by a full .IR path , or null if the .I path is empty, similar to the .BI xyla_ tree _current function. As mentioned above, this simply returns .BI * path [ k ] \fR. .PP The macro .B XYLA_BT_COPYPATH copies a .I path to a new vector .IR newpath , similar to the .BI xyla_ tree _copypath function. .PP The macro .B XYLA_BT_EXTRMPATH establishes a full path which targets the first or last node in a tree, similar to the .BI xyla_ tree _firstpath or .BI xyla_ tree _lastpath functions; if the tree is nil, then the path is empty. The .I rc argument is an lvalue to receive a return code; .I node is an lvalue to receive a pointer to the first or last node (or a null pointer, if the tree is nil); .I path is the path vector to fill in; .I k is an lvalue to receive the path length; .I root is the address of the tree root link; .I dir is .B left or .B right according to whether the first or last node, respectively, is sought; and .I htmax is the maximum tree height which can be accommodated by the .I path vector. On exit, .I rc is .B XYLA_RC_OK on success, or .B XYLA_RC_TALL if the tree's height exceeded the caller's limit; no other return codes are possible. .PP The macro .B XYLA_BT_STEPPATH advances a path one node left or right in the node ordering, similar to the .BI xyla_ tree _nextpath or .BI xyla_ tree _prevpath functions. If the path targets the gap at the extreme end, or the tree is simply nil, then it is left unchanged; if the path targets the extreme node, then it is updated to refer to the gap beyond; otherwise, the path is updated to refer to the next node in the appropriate direction. The .I rc argument is an lvalue to receive a return code; .I node is an lvalue to receive a pointer to the previous or next node (or a null pointer, if the path is already at or beyond the extreme node); .I path is the path vector to update; .I k is an lvalue holding the current path length, to be updated; .I dir and .I opp are .B left and .B right to advance the path towards the start of the sequence, or .I vice-versa to advance the path towards the end; and .I htmax is the maximum tree height which can be accommodated by the .I path vector. On exit, .I rc is .B XYLA_RC_OK on success, or .B XYLA_RC_TALL if the tree's height exceeded the caller's limit; no other return codes are possible. .PP The macro .B XYLA_BT_NUDGEPATH advances a .I full path to refer instead to the gap immediately before or after it, similar to the .BI xyla_ tree _beforepath or .BI xyla_ tree _afterpath functions. It is an error if the path is empty. The .I rc argument is an lvalue to receive a return code; .I path is the path vector to update; .I k is an lvalue holding current path length, to be updated; .I dir and .I opp are .B left and .B right to advance the path to the gap before the current target node, or .I vice-versa to advance the path to the gap after it; and .I htmax is the maximum tree height which can be accommodated by the .I path vector. On exit, .I rc is .B XYLA_RC_OK on success, or .B XYLA_RC_TALL if the tree's height exceeded the caller's limit; no other return codes are possible. .PP The macro .B XYLA_BT_ROOTPATH establishes a path targetting the root of a tree, similar to the .BI xyla_ tree _rootpath function. If the tree is nil, then the path will be empty; otherwise it will be full. The .I node argument is an lvalue to receive a pointer to the root node (or a null pointer, if the tree is nil); .I path is the path vector to fill in; .I k is an lvalue to receive the path length (which will be exactly 0); and .I root is the address of the tree root link. .PP The macro .B XYLA_BT_PARENTPATH updates a .I full path to target the current target node's parent, and reports the previous target node's position relative to its parent, similar to the .BI xyla_ tree _uppath function. If the path initially targets the tree root, then this is reported, but the path is not changed. The .I node argument is an lvalue to receive a pointer to the parent node, or null if the path already targetted the tree root; .I pos is an lvalue to receive the position indicator; .I path is the path vector to update; and .I k is an lvalue holding the current path length, to be updated. On exit, .I pos is set to .B XYLA_BTPOS_LEFT or .B XYLA_BTPOS_RIGHT if the original path target is the left or right child of its parent, respectively; or .B XYLA_BTPOS_ROOT if the original path target is the tree root. (Since this macro can only decrease the path length, it will always succeed and there is no need for a return code.) .PP The macro .B XYLA_BT_CHILDPATH updates a .I full path to target the current target node's left or right child, similar to the .BI xyla_ tree _leftpath or .BI xyla_ tree _rightpath functions. The path will thereby become full or empty according to whether the node has the relevant child. The .I rc argument is an lvalue to receive a return code; .I node is an lvalue to receive a pointer to the child node, or a null pointer if there is no such child; .I path is the path vector to update; .I k is an lvalue holding the current path length, to be updated; .I dir is .B left or .B right to select the left or right child, respectively;; and .I htmax is the maximum tree height which can be accommodated by the .I path vector. On exit, .I rc is .B XYLA_RC_OK on success, or .B XYLA_RC_TALL if the tree's height exceeded the caller's limit; no other return codes are possible. .PP The macro .B XYLA_BT_RIPPLE updates the ancestors of the target node of a .I full path, applying the update function of a node class, similar to the .BI xyla_ tree _ripple function. The .I cls argument is a pointer to the node class, which must not be null, and must have a non-null .B upd function pointer; .I path is the path vector; and .I k is the length of the path. .PP The macro .B XYLA_BT_ASCEND ascends the tree, allowing a caller-provided function to accumulate summary data from the ancestors of a node or gap targetted by a path, similar to the .BI xyla_ tree _ascend function. The .I rc argument is an lvalue to receive a return code; .I fn is a pointer to a .B xyla_bt_ascendfn to call on each ancestor node; .I path is the path vector; .I k is the path length; and .I arg is a pointer to be passed to .IR fn . See the description of .BI xyla_ tree _ascend for the full details. On exit, if .I fn returned a nonzero value, then .I rc is that nonzero value; otherwise .I rc is set to zero. . .SS "Search" The following macros assist with searching binary trees. .PP The macro .B XYLA_BT_LOOKUP searches a tree, guided by a navigation function, returning the node found, or null, similar to the .BI xyla_ tree _lookup function. The .I cls argument is a pointer to a node class, or null; .I node is an lvalue to receive a pointer to the found node, or null; .I root is a pointer to the root node (not the address of the root link); .I navfn is a caller-supplied navigation function, or null; and .I arg is a pointer to be passed to the navigation function. If .I navfn is not null, then it is used as the navigation function; otherwise .I cls must not be null, and must have a non-null .I nav function, which is used instead. .PP The macro .B XYLA_BT_PROBE searches a tree, as for .BR XYLA_BT_LOOKUP , and establishes a path to the node or gap found, similar to the .BI xyla_ tree _probe function. The .I cls argument is a pointer to a node class, or null; .I rc is an lvalue to receive a return code; .I node is an lvalue to receive a pointer to the found node, or null; .I root is a pointer to the root node (not the address of the root link); .I navfn is a caller-supplied navigation function, or null; .I arg is a pointer to be passed to the navigation function; .I path is the path vector to fill in; .I k is an lvalue to receive the path length; and .I htmax is the maximum tree height which can be accommodated by the .I path vector. The navigation function is determined in the same way as by .B XYLA_BT_LOOKUP above. On exit, .I rc is set to .B XYLA_RC_OK on success, or .B XYLA_RC_TALL .B XYLA_RC_TALL if the tree's height exceeded the caller's limit; no other return codes are possible. . .SS "Removal" The function .B xyla_bt_remove assists with removal of nodes from binary trees. It's easy to remove a leaf node, or a node with only one child; the usual approach for removing a node with both children is to locate the node's immediate successor \(en which is the leftmost node in the right subtree and therefore cannot itself have a left child \(en and exchange them. There is a .IR "deleted node" , which is the target node of the input path; there is a .IR "substitute node" ; and there is a .IR "replacement node" , with which the deleted node is replaced. In the case where the deleted node has zero or one child, the substitute node is null, and the replacement node is the deleted node's child, if any, or null; the link to the deleted node is replaced by a link to the replacement node. In the case where the deleted node has two children, then the substitute node is the deleted node's successor, and the replacement node is the right child of the substitute node; the link to the deleted node is replaced by a link to the substitute node, and the link to the substitute node is replaced by the replacement node. The arguments .IR del_out , .IR subst_out , and .IR replace_out give locations where the deleted node, substitute node, and replacement node are stored; .I path is a path vector initially holding a .I full path to the node to be deleted; .I k_inout is the address of the path length, which will be updated; .I htmax is the maximum tree height which can be accommodated by the .I path vector. On success, the function restructures the tree as described, updates the path to target to the replacement node, (i.e., where we pretend that the target node was all along) and returns .BR XYLA_RC_OK ; if the tree height is too great, the function returns .B XYLA_RC_TALL and the path and tree are not modified; no other return codes are possible. . .SS "Set operations" This section describes functions which implement set operations using `split' and `join' operations. .PP The necessary tree operations are supplied to the set functions as a .B "struct xyla_bt_treeops" structure. This contains three function pointers, .BR join , .BR splitat , and .BR splitroot , which behave as the functions .BI xyla_ tree _join \fR, .BI xyla_ tree _splitat \fR, and .BI xyla_ tree _splitroot \fR, described above. Note that the functions here include arguments for tracking tree heights, whether or not the tree structure has a notion of height: if the tree has no notion of height, then the heights passed in and returned from the functions are unimportant; it's conventional, but not necessary, to pass zero. .PP The algorithms used by the set functions are nonrecursive, but require additional storage in the form of a `stack' vector of type .B "struct xyla_bt_setopstack" passed in by the caller. This vector need not be initialized in any particular way, and does not require any kind of special disposal. .PP The function .B xyla_bt_unisect computes the union and intersection of two trees, similar to the .BI xyla_ tree _unisect function. The argument .I cls is a pointer to the node class; .I uni_out and .I isect_out are pointers to the output root links for the union and intersection, and .I uniht_out and .I isect_out are pointers to the output heights for the trees, or null to discard them; .I aroot and .I broot are the addresses of the root links for the input trees, and .I aht_inout and .I bht_inout are the addresses of their heights, or null; .I ops is a pointer to the .B "struct xyla_bt_treeops" structure; .I stack is a pointer to the stack vector; and .I htmax is the number of elements in the stack vector. The function returns .B XYLA_RC_OK on success: in this case, .BI * aroot and .BI * broot are cleared (unless they coincide with other outputs); .BI * aht_inout and .BI * bht_inout are also zeroed, unless the respective pointers are null, or they overlap with other outputs; .BI * uni_out and .BI * isect_out point to the root nodes of the union and intersection trees; and .BI * uniht_out and .BI * isectht_out hold the heights of the output trees. If the provided stack vector was insufficient, then .B XYLA_RC_TALL is returned; .BI * uni_out \fR, .BI * isect_out \fR, .BI * uniht_inout \fR, and .BI * isectht_inout \fR are unchanged; and the input trees at .BI * aroot and .BI * broot \fR, and their heights in .BI * aht_inout and .BI * bht_inout \fR, are .I changed in such a way that, if the call is retried with a larger stack vector, or the trees are rebalanced so as to reduce their maximum height, then the correct answer will be obtained. .PP The function .B xyla_bt_diffsect computes the difference and intersection of two trees, similar to the .BI xyla_ tree _diffsect function. The argument .I cls is a pointer to the node class; .I diff_out and .I isect_out are pointers to the output root links for the union and intersection, and .I diffht_out and .I isect_out are pointers to the output heights for the trees, or null to discard them; .I aroot and .I broot are the addresses of the root links for the input trees, and .I aht_inout is the addresses of the .I a tree's height, or null; .I ops is a pointer to the .B "struct xyla_bt_treeops" structure; .I stack is a pointer to the stack vector; and .I htmax is the number of elements in the stack vector. The function returns .B XYLA_RC_OK on success: in this case, .BI * aroot is cleared (unless it coincide with another output); .BI * aht_inout is also zeroed, unless the respective pointers are null, or it overlaps with another output; .BI * diff_out and .BI * isect_out point to the root nodes of the difference and intersection trees; and .BI * uniht_out and .BI * isectht_out hold the heights of the output trees. If the provided stack vector was insufficient, then .B XYLA_RC_TALL is returned; .BI * diff_out \fR and .BI * diffht_inout \fR are unchanged; and the nodes of the .I a tree are scattered in an arbitrary manner between .BI * aroot and .BI * isect_out \fR, with their heights in .BI * aht_inout and .BI * isectht_out \fR, so that, if these two trees are remerged (e.g., by .B xyla_bt_unisect \(en note that their intersection will be nil) then the original .I a tree will be recovered, up to differences in internal structure. . \"-------------------------------------------------------------------------- .SH "CHECKING FRAMEWORK" . The Xyla library includes a comprehensive and robust framework for checking tree structures. . .SS "Check process" Checking a tree involves a walk over the tree nodes. Checking is .IR destructive : links are modified in order to keep track of which nodes have been visited so far, in order to detect `tangled pointers'. .PP The walk will call .I checking functions on each node. There are potentially two such checking functions: the .I tree checker is provided by the tree implementation, to verify properties of the tree structure, e.g., the AVL balance property, or the red-black properties; and the .I user checker to verify properties determined by the application, e.g., node ordering properties, or summary data maintained by .B upd functions. Associated with each checking function are a single .I "state pointer" , and a .I "node information block" for each node in the tree. .PP A checking function is passed an .I op code, and a pointer to a .B "struct xyla_bt_check" structure. Checking functions are called multiple times for each node: the .I op argument explains how the current call fits into the overall sequence. The .B "struct xyla_bt_check" structure collects together the known facts about a node \(en its parent, its children, and the node information blocks for all of these \(en and global state for the tree walk. A checking function returns an integer code, which should be .B XYLA_RC_OK (zero) if the call was successful; .B XYLA_RC_BAD if the function detected a problem, but checking should continue, e.g., to find other problems; or some other code to cause a .I panic and end the checking procedure as soon as possible. .PP The checking procedure is as follows. The walk begins at the root and proceeds mostly recursively. (The actual implementation is nonrecursive, in the sense that it uses constant stack space.) .PP If the node is null, then the checking functions are called with .IR op "\ \=\ " \fB\&XYLA_CHKOP_NIL . Otherwise, the following steps are taken in order. .hP 1. An information block is allocated for each active checking function. .hP 2. The checking functions are called with .IR op "\ \=\ " \fB\&XYLA_CHKOP_SETUP to initialize their information blocks. .hP 3. The checking functions are called with .IR op "\ \=\ " \fB\&XYLA_CHKOP_BEFORE . .hP 4. The node's left link is visited recursively. .hP 5. The checking functions are called with .IR op "\ \=\ " \fB\&XYLA_CHKOP_MID . .hP 6. The node's right link is visited recursively. .hP 7. The checking functions are called with .IR op "\ \=\ " \fB\&XYLA_CHKOP_AFTER . .hP 8. Some time later, after the checking functions have been called on the .I parent node with .IR op "\ \=\ " \fB\&XYLA_CHKOP_AFTER , they are called on .I this node with .IR op "\ \=\ " \fB\&XYLA_CHKOP_TEARDOWN . .PP There's no clear technical distinction between .B XYLA_CHKOP_SETUP and .BR XYLA_CHKOP_BEFORE . The idea is that .B XYLA_CHKOP_SETUP should ensure that a subsequent .B XYLA_CHKOP_TEARDOWN call can run successfully, while .B XYLA_CHKOP_BEFORE should ensure that any information needed by the node's children is available prior to visiting them. .PP In the event of a panic, not all of the above calls will be issued. However, a call with .IR op "\ \=\ " \fB\&XYLA_CHKOP_TEARDOWN will be made for a node if and only if a call with .IR op "\ \=\ " \fB\&XYLA_CHKOP_SETUP was previously made for the same node, regardless of whether that call succeeded. .PP The .B "struct xyla_bt_check" structure contains the following members. .TP .B cls The node class for the tree, as passed to the .B xyla_bt_check function. .TP .B root The address of the tree's root link. This is mostly used for reporting the tree identity in diagnostics. .TP .B fp The output stream for diagnostic messages, as passed to the .B xyla_bt_check function. This may be null to suppress diagnostics. .TP .B f The flags word, as passed to the .B xyla_bt_check function. .BR parent " and " parent_info The parent node, and the checker's node information block for the parent. If the current node is the tree root, then these are both null pointers. .BR node ", " pos ", and " node_info The current node, its position relative to its parent, and the checker's node information block for the node. The .B pos member is .B XYLA_BTPOS_ROOT if the node is the tree root, or .B XYLA_BTPOS_LEFT or .B XYLA_BTPOS_RIGHT if it is the left or right child of its parent, respectively. When a null link is encountered, .IR op "\ \=\ " \fB\&XYLA_CHKOP_NIL , and both .B node and .B node_info are null; .B pos is still set indicating the position of the null link. .TP .BR left " and " left_info The left child node, and the checker's node information block for the child. Both of these will be null if the current node has no left child. The .B left_info pointer will be null if .I op is either .B XYLA_CHKOP_SETUP or .BR XYLA_CHKOP_BEFORE . .TP .BR right " and " right_info The right child node, and the checker's node information block for the child. Both of these will be null if the current node has no right child. The .B right_info pointer will be null if .I op is either .B XYLA_CHKOP_SETUP or .BR XYLA_CHKOP_BEFORE . .TP .B state The state pointer associated with the checking function. .PP A checking function is allowed to modify the .BR parent_info , .BR node_info , .BR left_info , .BR right_info , and .B state members of the structure, which may be useful when chaining to other checking functions. Other members must not be changed. . .SS "Node class checking operations" A node class can include operations used during checking. These are defined in .BR "struct xyla_bt_chkops" . A node class which includes checking operations must also include ordering operations (the .B nav and .B key functions) though these can be null pointers if they are not otherwise required. The checking operations are as follows. .TP .B chk The user checking function, as described above. .TP .B infosz The size of a node information block used by the user checking function. .TP .B id A function to print a description of a node. This is used by .B xyla_bt_printnode (described below); it may be null, in which case a default generic format is used. . .SS "Functions" The function .B xyla_bt_check is the main entry point into the checking machinery. It performs the checking procedure described above. The argument .I cls is a pointer to the node class, which may be null if no user checking is required; .I root is the address of the tree's root link, which is used in diagnostic messages; .I fp is the output string on which diagnostic messages should be printed, which may be null to suppress diagnostic output; .I tree_chkfn is the tree checking function, which may (unusually) be null if the tree structure has no special invariants; .I tree_infosz is the size of a node information block used by the tree checking function; .I tree_state is the state pointer to be passed to the tree checking function; and .I user_state is the state pointer to be passed to the user checking function. The function returns .B XYLA_RC_OK if the check succeeded with no problems; .B XYLA_RC_BAD if the check reported problems with the tree structure; or some other nonzero return code if a checking function caused a panic \(en the code returned is equal to that returned by the panicking checking function. .PP The function .B xyla_bt_bughdr begins printing a diagnostic message reporting a problem with a tree. The header printed has the form .IP .I token .I root .B BUG: .PP The argument .I token identifies the checking function which found the problem, e.g., .B XYLA-AVL for the AVL tree invariant checker; .I root is the root link address, which is printed to identify the faulty tree; and .I fp is the output stream on which the message is to be printed. The caller should continue to describe the problem using .BR fprintf (3) and .B xyla_bt_printnode (below), and finally end with a newline. .PP The function .B xyla_bt_printnode prints a description of a node. The argument .I cls is the node class, as provided to .BR xyla_bt_check ; .I node is the node address (which may be null); and .I fp is the stream on which the description is to be printed. If the node class includes an .B id function, then this is simply called, with the same arguments, to do the job. Otherwise, a generic description is printed, including the node address. .PP The function .B xyla_bt_chkorder is a checking function which verifies that the order of nodes in a tree matches that expected by the .B nav and .B key ordering operations in the node class. The function uses .B "struct xyla_bt_ordinfo" as its node information block, and requires no state. It cannot panic. . .\"-------------------------------------------------------------------------- .SH "EXAMPLES" . .SS "A search tree with string-valued keys" Our first example demonstrates a binary search tree, whose search keys are null-terminated strings. We shall use AVL trees in this example. The node structure must therefore be something like this. .VS .ne 5 .ta 2n struct node { struct xyla_avl_node avl; char *key; /*\fR other data \fP*/ }; .VE Next, we must establish the node class operations. We don't need an update function. Since this is a binary search tree, we should provide .IR "ordering operations" , as described above. Our search keys will be simple C-style character pointers to null-terminated strings. The navigation function can use .BR strcmp (3) to compare keys. .VS .ne 8 .ta 2n \w'\fBstatic int str_nav('u static int str_nav(const struct xyla_bt_nodecls *cls, const struct xyla_bt_node *node, void *arg) { const struct node *n = (const struct node *)node; const char *k = arg; return (strcmp(k, n->key)); } .VE The key-projection function is nearly trivial. .VS .ne 7 .ta 2n \w'\fBstatic int str_key('u static int str_key(const struct xyla_bt_nodecls *cls, const struct xyla_bt_node *node) { const struct node *n = (const struct node *)node; return (n->key); } .VE Finally, we should add checking operations. We can use .B xyla_bt_chkorder as the checking function. To make diagnostic output more readable, it will be best to provide our own node-identification function. .VS .ne 7 .ta 2n \w'\fBstatic void str_id('u static void str_id(const struct xyla_bt_nodecls *cls, const struct xyla_bt_node *node, FILE *fp) { const struct node *n = (const struct node *)node; fprintf(fp, "#", n->key); } .VE With all this done, we can define the node operations table. .VS .ne 4 .ta 2n static const struct xyla_bt_chkops str_ops = { { sizeof(str_ops), 0 }, { str_nav, str_key }, { xyla_bt_chkorder, sizeof(struct xyla_bt_ordinfo), str_id } }; .VE Our node class requires no additional state, so we can use .B struct xyla_bt_nodecls directly for our node class object. This can be initialized as follows. .VS .ne 3 struct xyla_bt_nodecls str_cls; str_cls.ops = &str_ops.node; .VE We can find the node in the tree with a particular key like this. .VS .ne 6 struct xyla_bt_node *root; struct node *node; const char *key; ... node = xyla_avl_lookup(&str_cls, &root, 0, key); .VE Commonly, applications want to find a node with a key, or insert a new node with that key. This can be achieved as follows. .VS .ne 16 .ta 2n +2n struct xyla_bt_node *root; struct xyla_bt_path path; struct node *node; const char *key; size_t kn; ... node = xyla_avl_probe(&str_cls, &root, 0, key, &path); if (!node) { node = malloc(sizeof(struct node)); if (!node) goto nomem; kn = strlen(key); node->key = malloc(kn + 1); if (!node->key) { free(node); goto nomem; } memcpy(node->key, key, kn + 1); /*\fR initialize the rest of the node \fP*/ xyla_avl_insert(&str_cls, &path, node); } /*\fR update an existing node \fP*/ .VE .SS "A tree where nodes are accessed by index" The next example shows how a tree can be used to maintain a sequence of items, with efficient access to nodes by .IR index . The primary `trick' here is to maintain, for each node, the size of that node's subtree. The index of the root node is simply the size of its left subtree; the index of an arbitrary node situated in the left subtree is equal to its index within the left subtree; and the index of a node situated in the right subtree is equal to its index within the right subtree .I plus the size of the left subtree, plus 1 for the root node. .PP We shall use a red-black tree for this example, in the expectation that insertions and removals will be frequent. (A relatively static sequence may well be better handled as a simple vector.) The node structure must be something like this. .VS .ne 5 .ta 2n struct node { struct xyla_rb_node rb; size_t n; /*\fR other data \fP*/ }; .VE The size member .B n needs to be updated when the tree is restructured. We therefore need to define an update function. .VS .ne 9 .ta 2n +2n \w'\fBstatic void idx_upd('u static void idx_upd(struct xyla_bt_nodecls *cls, struct xyla_bt_node *node) { struct node *n = (struct node *)node, *l = (struct node *)n->rb.bt.left, *r = (struct node *)n->rb.bt.right; n->n = 1 + (l ? l->n : 0) + (r ? r->n : 0); } .VE This isn't a search tree, so a key-projection function doesn't make any sense. But we shall still need a navigation function to find a node given its index. Unlike above, this navigation function will need to .I adjust its argument as the search descends the tree, maintaining the invariant that it holds the index of the node sought within the subtree rooted at the current node. .VS .ne 14 .ta 2n +2n \w'fBstatic int idx_nav(' static int idx_nav(struct xyla_bt_nodecls *cls, const struct xyla_bt_node *node, void *arg) { const struct node *n = (const struct node *)node, *l = (const struct node *)n->rb.bt.left; size_t *i_inout = arg; if (l) { if (i < l->n) return (\-1); else i -= l->n; } if (!i) return (0); else { *i_inout = i \- 1; return (+1); } } .VE Finally, we can define checking operations. We omit a node-identification function here, since that will depend on the content of a node. But the checking function is easy enough. .VS .ne 24 .ta \w'\fBstatic int idx_chk('u static int idx_chk(unsigned op, const struct xyla_bt_check *chk) .ta 2n +2n +2n +2n +\w'\fBfprintf('u { const struct node *node, *left, *right; size_t n; int rc = XYLA_RC_OK; if (op == XYLA_CHKOP_AFTER) { node = (const struct node *)chk->node; left = (const struct node *)chk->left; right = (const struct node *)chk->right; n = 1 + (left ? left->n : 0) + (right ? right->n : 0); if (node->n != n) { if (chk->fp) { xyla_bt_bughdr("EXAMPLE", chk->root, chk->fp); fputs("node ", chk->fp); fprintf(chk->fp, " count %lu /= %lu expected\n", (unsigned long)node->n, (unsigned long)n); } rc = XYLA_RC_BAD; } } return (rc); } .VE We can now define the node class operations table. .VS static const struct xyla_bt_chkops idx_ops = { { sizeof(idx_ops), idx_upd }, { 0, 0 }, { idx_chk, 0, 0 } }; .VE The node class can be established as in the previous example .VS .ne 3 struct xyla_bt_nodecls idx_cls; idx_cls.ops = &idx_ops.node; .VE The count .B n in the root node will always be equal to the total number of nodes in the tree. .VS .ne 4 struct xyla_bt_node *root; size_t n; ... n = root ? ((const struct node *)root)->n : 0; .VE To find the node at index .BR i , use code like the following. Note that .B i will be .I clobbered by the navigation function. .VS .ne 6 struct xyla_bt_node *root; struct node *node; size_t i; ... node = xyla_rb_lookup(&idx_cls, &root, idx_nav, &i); .VE The .B node pointer will be null if the index was out of range. .PP To insert a new node at an index .BR i , so that existing nodes with index .B i or greater are shifted along by one place, one might write code as follows. We assume that .B n holds the total number of nodes in the tree: this can either be maintained, or determined from the root node as shown above. .VS .ne 19 .ta 2n +2n struct xyla_bt_node *root; struct xyla_bt_path path; struct node *node; size_t i, n; ... if (i > n) goto out_of_range; else if (i == n) { xyla_rb_lastpath(&root, &path); xyla_rb_afterpath(&path); } else { node = xyla_rb_probe(&idx_cls, &root, idx_nav, &i, &path); assert(node); xyla_rb_beforepath(&path); } node = malloc(sizeof(struct node)); if (!node) goto nomem; /*\fR initialize the node \fP*/ xyla_rb_insert(&idx_cls, &path, node); .VE . .\"-------------------------------------------------------------------------- .SH "BUGS" . It would be really nice if there were some way to implement .IR "copy on write" . Then a node could be attached to multiple trees, with a reference count. When the node needs to be changed, either because the application changes the data held by the node, or because the node's links need to be changed to maintain the tree's balance properties, and the node is part of multiple trees, then, in principle, a fresh copy of the node can be allocated, leaving the other trees unchanged. I'd like to support this by adding a .B touch function to the node class: then the library will .B touch each node that it wants to change and allow the application to create a copy if necessary. This is hard for two reasons. .hP \*o Firstly, and most obviously, it introduces a failure point into middle of the tree balancing algorithms. If the .B touch function fails, most likely due to lack of memory, then the operation needs to be `rewound' somehow and an error reported. This adds significant complexity to the core tree maintenance algorithms. It gets even worse in the set functions, which may end up in a situation where a failure has occurred while the algorithm has lots of small fragments of tree, and it seems very likely that whatever caused the initial failure will also prevent the fragments from being reassembled. It's tempting to declare that .B touch must be infallible, but I'm not yet ready to bite that bullet. .hP \*o Secondly, not all tree structures can support copy-on-write in a useful way. Splay trees maintain parent pointers: I initially tried without, and it led to frequent rebalancing, which had terrible performance. As a result, if any node is replaced, the .I entire tree needs to be rewritten. (Essentially, if we replace some node .IR n , then every other node from which .I n can be reached also needs to replaced. In a tree without parent pointers, that's just the node's ancestors, and there are only logarithmically many of them. If we have parent pointers, though, that's the whole tree. .PP So currently there's no copy-on-write support, because it introduces failures which are really hard to deal with, and can't be implemented on all kinds of trees anyway. .PP There are some kinds of node annotations which are hard to maintain. For example, there's a trick for reversing the order of subranges: essentially, you just set a bit on a node to say `these nodes are in the opposite order from usual'. With that, and swapping a few left and right child links, you're done. Correctly maintaining the swap bits and state seems hard in the current structure. . .\"-------------------------------------------------------------------------- .SH "SEE ALSO" . .BR hsearch (3), .BR tsearch (3). .PP (Seriously, .BR hsearch (3) is comically bad: don't use it. The .BR tsearch (3) function is still quite bad, but at least usable.) . .\"-------------------------------------------------------------------------- .SH "AUTHOR" . Mark Wooding, . . .\"----- That's all, folks -------------------------------------------------- . .\" LocalWords: Adelson aht aq aroot ascendfn avl AVLBAL AVLF bht br broot .\" LocalWords: bsearch bt BTPOS bughdr CBOR chk CHKF chkfn CHKOP CHKRC cls .\" LocalWords: de del DER diffht ds dummyfn el enum ev expht EXTRMPATH fam .\" LocalWords: fB fBfprintf fBstatic fi fm fn fP fprintf fputs fR fu goto .\" LocalWords: Guibas hP hsearch HTCHG htmax IB idfn idx ie infosz inout .\" LocalWords: invariants isect isectht iter joinfn keyfn Landis LBIAS lht .\" LocalWords: lu lvalue malloc mem memcpy nav navfn ne nf nodecls NODEPFX .\" LocalWords: nodeu NODEUSFX nomem offsetof ord ordops ordopslots pos rb .\" LocalWords: RBF RBIAS rc rebalance rebalanced rebalancing remerged rht .\" LocalWords: rit riter rootht rsvd se Seidel sizeof Sleator sp spl .\" LocalWords: splitat splitatfn splitrootfn sqrt sr ss str strcmp .\" LocalWords: strerror strlen sz th TP treaps trp tsearch ue uniht upd .\" LocalWords: updfn uppath VE Velsky w'fBstatic xf xyla Xyla's