.\" -*-nroff-*-
.\"
.\" Manual for Xyla
.\"
.\" (c) 2024 Straylight/Edgeware
.\"
.
.\"----- Licensing notice ---------------------------------------------------
.\"
.\" This file is part of Xyla, a library of binary trees.
.\"
.\" Xyla is free software: you can redistribute it and/or modify it under
.\" the terms of the GNU Lesser General Public License as published by the
.\" Free Software Foundation; either version 3 of the License, or (at your
.\" option) any later version.
.\"
.\" Xyla is distributed in the hope that it will be useful, but WITHOUT
.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
.\" License for more details.
.\"
.\" You should have received a copy of the GNU Lesser General Public
.\" License along with Xyla. If not, see .
.
.TH xyla 3 "9 September 2024" "Straylight/Edgeware" "Xyla binary trees library"
.
.SH NAME
xyla \- fast, hard-mode binary trees
.
.\"--------------------------------------------------------------------------
.\" Preliminary definitions.
.
.\" String definitions and font selection.
.ie t \{\
. ds o \(bu
. ds ss \s8\u
. ds se \d\s0
. ds us \s8\d
. ds ue \u\s0
. ds *d \(*d
. ds /= \(!=
. ds <= \(<=
. ds >= \(>=
. ds mu \(mu
. ds sr \(sr
. ds ' \(fm
. ds , \h'\w'\ 'u/2u'
. if \n(.g \{\
. fam P
. ev an-1
. fam P
. ev
. \}
.\}
.el \{\
. ds o o
. ds ss ^
. ds se
. ds us _
. ds ue
. ds mu *
. ds sr sqrt
. ds ' \(aq
. ds *d \,\fI\&delta\/\fP
. ds /= /=
. ds <= <=
. ds >= >=
. ds , \ \"
.\}
..
.
.\" hP TEXT -- start an indented paragraph with TEXT hanging off to the left
.de hP
. IP
. ft B
\h'-\w'\\$1\ 'u'\\$1\ \c
. ft P
..
.
.\" .VS ... .VE -- present a code sample; separate paragraphs with `.VP'
.de VS
. sp
. RS
. nf
. ft B
..
.ie t \{\
. de VP
. sp .4v
..
.\}
.el \{\
. de VP
. sp
..
.\}
.de VE
. ft R
. fi
. RE
. sp
..
.
.\"--------------------------------------------------------------------------
.SH SYNOPSIS
.
.nf
.ne 5
.B "#include "
.B "#include "
.B "#include "
.B "#include "
.B "#include "
.PP
.ne 3
.fi
.ta \w'\fB/'u
.B /*
In the following,
.I tree
is any of
.BR avl ,
.BR rb ,
.BR splay ,
or
.BR treap ;
.br
.B " * "
.I TREE
is the corresponding one of
.BR AVL ,
.BR RB ,
.BR SPLAY ,
or
.BR TREAP ;
and
.br
.B " * "
.I t
is the corresponding one of
.BR avl ,
.BR rb ,
.BR spl ,
or
.BR trp .
.br
.B " */"
.nf
.PP
.ne 11
.BR /* " Return codes. " */
.ta 2n
.B "enum {"
.B " XYLA_RC_OK = 0,"
.B " XYLA_RC_HTCHG = ...,"
.B " XYLA_RC_FAIL = ...,"
.B " XYLA_RC_TALL = ...,"
.B " XYLA_RC_BAD = ...,"
.B " XYLA_RC_NOMEM = ...,"
.B " XYLA_RC_LOSELIMIT = ...,"
.B " XYLA_RC_WINLIMIT = ..."
.B "};"
.PP
.BI "const char *xyla_strerror(int " rc );
.PP
.ne 6
.BR /* " Basic node structure. " */
.ta 2n
.B "struct xyla_bt_node {"
.B " struct xyla_bt_node *left, *right;"
.B "};"
.PP
.ne 4
.BR /* " Node class. " */
.ta 2n
.B "struct xyla_bt_nodecls {"
.B " const struct xyla_bt_nodeops *ops;"
.B "};"
.PP
.ne 8
.ta \w'\fB\&typedef void xyla_bt_updfn('u
.BI "typedef void xyla_bt_updfn(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *" node );
.B "typedef void xyla_bt_dummyfn(void);"
.ta 2n
.B "struct xyla_bt_nodeops {"
.B " size_t sz;"
.B " xyla_bt_updfn *upd;"
.B " xyla_bt_dummyfn *_rsvd2, *_rsvd3";
.B "};"
.PP
.ne 13
.ta \w'\fB\&typedef int xyla_bt_navfn('u
.BI "typedef int xyla_bt_navfn(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_bt_node *" node ,
.BI " void *" arg );
.ta \w'\fB\&typedef const void *xyla_bt_keyfn('u
.BI "typedef const void *xyla_bt_keyfn(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_bt_node *" node );
.ta 2n
.B "struct xyla_bt_ordopslots {"
.B " xyla_bt_navfn *nav;"
.B " xyla_bt_keyfn *key;"
.B "};"
.B "struct xyla_bt_ordops {"
.B " struct xyla_bt_nodeops node;"
.B " struct xyla_bt_ordopslots ord;"
.B "};"
.PP
.ne 6
.ta 2n
.BR /* " Node positions. " */
.B "enum {"
.B " XYLA_BTPOS_ROOT,"
.B " XYLA_BTPOS_LEFT,"
.B " XYLA_BTPOS_RIGHT"
.B "};"
.PP
.ne 9
.BR /* " Iteration. " */
.BI "struct xyla_" tree "_iter { ...\& };"
.BI "struct xyla_" tree "_riter { ...\& };"
.ta \w'\fB\&void xyla_\fI\&tree\/\fB_inititer('u
.BI "void xyla_" tree "_inititer(const struct xyla_bt_node *" root ,
.BI " struct xyla_" tree "_iter *" it );
.BI "void *xyla_" tree "_next(struct xyla_" tree "_iter *" it );
.ta \w'\fB\&void xyla_\fI\&tree\/\fB_initriter('u
.BI "void xyla_" tree "_initriter(const struct xyla_bt_node *" root ,
.BI " struct xyla_" tree "_riter *" rit );
.BI "void *xyla_" tree "_prev(struct xyla_" tree "_riter *" rit );
.PP
.ne 6
.BR /* " Paths. " */
.BI "struct xyla_" tree "_path { ...\& };"
.ta \w'\fB\&typedef int xyla_bt_ascendfn('u
.BI "typedef int xyla_bt_ascendfn(struct xyla_bt_node *" node ,
.BI " struct xyla_bt_node *" parent ,
.BI " struct xyla_bt_node *" sibling ,
.BI " unsigned " pos ", void *" arg );
.ne 3
.BI "void *xyla_" tree "_current(const struct xyla_" tree "_path *" path );
.ta \w'\fB\&void xyla_\fI\&tree\/\fB_copypath('u
.BI "void xyla_" tree "_copypath(struct xyla_" tree "_path *" newpath ,
.BI " const struct xyla_" tree "_path *" path );
.ne 8
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_firstpath('u
.BI "void *xyla_" tree "_firstpath(struct xyla_bt_node **" root ,
.BI " struct xyla_" tree "_path *" path );
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_lastpath('u
.BI "void *xyla_" tree "_lastpath(struct xyla_bt_node **" root ,
.BI " struct xyla_" tree "_path *" path );
.BI "void *xyla_" tree "_nextpath(struct xyla_" tree "_path *" path );
.BI "void *xyla_" tree "_prevpath(struct xyla_" tree "_path *" path );
.BI "void xyla_" tree "_beforepath(struct xyla_" tree "_path *" path );
.BI "void xyla_" tree "_afterpath(struct xyla_" tree "_path *" path );
.ne 6
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_rootpath('u
.BI "void *xyla_" tree "_rootpath(struct xyla_" tree "_path *" path ,
.BI " struct xyla_bt_node **" root );
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_uppath('u
.BI "void *xyla_" tree "_uppath(unsigned *" pos_out ,
.BI " const struct xyla_" tree "_path *" path );
.BI "void *xyla_" tree "_leftpath(struct xyla_" tree "_path *" path );
.BI "void *xyla_" tree "_rightpath(struct xyla_" tree "_path *" path );
.ne 6
.ta \w'\fB\&void xyla_\fI\&tree\/\fB_replace('u
.BI "void xyla_" tree "_replace(const struct xyla_" tree "_path *" path ,
.BI " struct xyla_" tree "_node *" node );
.ta \w'\fB\&void xyla_\fI\&tree\/\fB_ripple('u
.BI "void xyla_" tree "_ripple(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_" tree "_path *" path );
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_ascend('u
.BI "int xyla_" tree "_ascend(xyla_bt_ascendfn *" fn ,
.BI " const struct xyla_" tree "_path *" path ", void *" arg );
.PP
.ne 7
.BR /* " Searching. " */
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_lookup('u
.BI "void *xyla_" tree "_lookup(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" root ,
.BI " xyla_bt_navfn *" nav ", void *" arg );
.ta \w'\fB\&void *xyla_\fI\&tree\/\fB_probe('u
.BI "void *xyla_" tree "_probe(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" root ,
.BI " xyla_bt_navfn *" nav ", void *" arg ,
.BI " struct xyla_" tree "_path *" path );
.PP
.ne 6
.BR /* " Insertion and removal. " */
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_insert('u
.BI "int xyla_" tree "_insert(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_" tree "_path *" path ,
.BI " struct xyla_" tree "_node *" node );
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_remove('u
.BI "int xyla_" tree "_remove(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_" tree "_path *" path );
.PP
.ne 6
.BR /* " Joining and splitting. " */
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_join('u
.BI "int xyla_" tree "_join(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" root_out\fR[ ", int *" rootht_out\fR] ,
.BI " struct xyla_bt_node **" left\fR[ ", int " lht\fR] ,
.BI " struct xyla_bt_node *" mid ,
.BI " struct xyla_bt_node **" right\fR[ ", int " rht\fR] );
.ne 4
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_split('u
.BI "int xyla_" tree "_split(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" left_out\fR[ ", int *" lht_out\fR] ,
.BI " struct xyla_bt_node **" mid_out ,
.BI " struct xyla_bt_node **" right_out\fR[ ", int *" rht_out\fR] ,
.BI " struct xyla_" tree "_path *" path );
.ne 7
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_splitat('u +2n
.BI "int xyla_" tree "_splitat(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" left_out ,
.BI " " \fR[ "int *" lht_out ,\fR]
.BI " struct xyla_bt_node **" mid_out ,
.BI " struct xyla_bt_node **" right_out ,
.BI " " \fR[ "int *" lht_out ,\fR]
.BI " struct xyla_bt_node **" root ,
.BI " const void *" key );
.ne 6
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_splitroot('u +2n
.BI "int xyla_" tree "_splitroot(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" left_out ,
.BI " " \fR[ "int *" lht_out ,\fR]
.BI " struct xyla_bt_node **" root_out ,
.BI " struct xyla_bt_node **" right_out ,
.BI " " \fR[ "int *" rht_out ,\fR]
.BI " struct xyla_bt_node **" root\fR[ ", int " ht\fR] );
.PP
.ne 8
.BR /* " Set operations. " */
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_unisect('u +2n
.BI "int xyla_" tree "_unisect(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" uni_out ,
.BI " " \fR[ "int *" uniht_out ,\fR]
.BI " struct xyla_bt_node **" isect_out ,
.BI " " \fR[ "int *" isectht_out ,\fR]
.BI " struct xyla_bt_node **" aroot\fR[ ", int " aht\fR] ,
.BI " struct xyla_bt_node **" broot\fR[ ", int " bht\fR] );
.ne 7
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_diffsect('u +2n
.BI "int xyla_" tree "_diffsect(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" diff_out ,
.BI " " \fR[ "int *" diffht_out ,\fR]
.BI " struct xyla_bt_node **" isect_out ,
.BI " " \fR[ "int *" isectht_out ,\fR]
.BI " struct xyla_bt_node **" aroot\fR[ ", int " aht\fR] ,
.BI " struct xyla_bt_node **" broot );
.PP
.ne 5
.BR /* " Checking. " */
.ta \w'\fB\&int xyla_\fI\&tree\/\fB_check('u
.BI "int xyla_" tree "_check(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *const *" root ,
.BI " FILE *" fp ", unsigned " f ,
.BI " " \fR[ "int " expht "," \fR] " void *" arg );
.PP
.ne 8
.ta 2n
.BR /* " AVL tree node structure. " */
.B "#define XYLA_AVLF_BALMASK 3u"
.B "#define XYLA_AVLBAL_LBIAS 1u"
.B "#define XYLA_AVLBAL_EVEN 0u"
.B "#define XYLA_AVLBAL_RBIAS 2u"
.B "struct xyla_avl_nodeslots {"
.B " unsigned f;"
.B "};"
.PP
.ne 2
.BR /* " AVL tree operations. " */
.BI "int xyla_avl_height(const struct xyla_avl_node *" node );
.PP
.ta 2n
.ne 5
.BR /* " Red-black tree node structure. " */
.B "#define XYLA_RBF_RED 1u"
.B "struct xyla_rb_nodeslots {"
.B " unsigned f;"
.B "};"
.PP
.ne 2
.BR /* " Red-black tree operations. " */
.BI "int xyla_rb_height(const struct xyla_rb_node *" node );
.PP
.ne 4
.ta 2n
.BR /* " Splay tree node structure. " */
.B "struct xyla_splay_nodeslots {"
.B " struct xyla_splay_node *parent;"
.B "};"
.PP
.ne 5
.BR /* " Splay tree operations. " */
.ta \w'\fB\&void xyla_splay_splay('u
.BI "void xyla_splay_splay(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_splay_path *" path );
.ta \w'\fB\&void xyla_splay_rebalance('u
.BI "void xyla_splay_rebalance(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" root );
.PP
.ne 4
.ta 2n
.BR /* " Treap node structure. " */
.B "struct xyla_treap_nodeslots {"
.B " size_t wt;"
.B "};"
.PP
.ne 8
.ta 2n 64n
.BR /* " Common node structure machinery. " */
.B "#define XYLA_BT_NODEPFX struct xyla_bt_node node"
.BI "#define XYLA_" TREE "_NODEPFX \e"
.B " XYLA_BT_NODEPFX; \e"
.BI " struct xyla_" tree "_nodeslots " t
.BI "struct xyla_" tree "_node {"
.B " struct xyla_bt_node bt;"
.BI " struct xyla_" tree "_nodeslots " t ;
.B "};"
.ne 8
.ta 2n 64n
.B "#define XYLA_BT_NODEUSFX struct xyla_bt_node node"
.BI "#define XYLA_" TREE "_NODEUSFX \e"
.BI " struct xyla_" tree "_node " t "; \e"
.B " XYLA_BT_NODEUSFX"
.BI "union xyla_" tree "_nodeu {"
.BI " struct xyla_" tree "_node " t ;
.B " struct xyla_bt_node bt;"
.B "};"
.PP
.ne 5
.BR /* " Low-level iteration machinery. " */
.ta 2n
.ta \w'\fB\&void XYLA_BT_STEPITER('u
.BI "void XYLA_BT_STEPITER(int &" rc ", struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_node **" stack ", int &" sp ", " back );
.BI "void *xyla_bt_severfirst(struct xyla_bt_node **" root );
.PP
.ne 4
.BR /* " Low-level path machinery. " */
.ta 2n +\w'\fB('u
.B "struct xyla_bt_node *XYLA_BT_CURRENT"
.BI " (struct xyla_bt_node **const *" path ,
.BI " int " k );
.ne 3
.ta \w'\fB\&void XYLA_BT_COPYPATH('u
.BI "void XYLA_BT_COPYPATH(struct xyla_bt_node ***" newpath ,
.BI " struct xyla_bt_node **const *" path ", int " k );
.BI " struct xyla_bt_node ***" path ", int &" k ,
.ne 9
.ta \w'\fB\&void XYLA_BT_EXTRMPATH('u
.BI "void XYLA_BT_EXTRMPATH(int &" rc ", struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_node **" root ,
.BI " struct xyla_bt_node ***" path ", int &" k ,
.BI " " dir ", int " htmax );
.ta \w'\fB\&void XYLA_BT_STEPPATH('u
.BI "void XYLA_BT_STEPPATH(int &" rc ", struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_node ***" path ", int &" k ,
.BI " " dir ", " opp ", int " htmax );
.ta \w'\fB\&void XYLA_BT_NUDGEPATH('u
.BI "void XYLA_BT_NUDGEPATH(int &" rc ,
.BI " struct xyla_bt_node ***" path ", int &" k ,
.BI " " dir ", " opp ", int " htmax );
.ne 8
.ta \w'\fB\&void XYLA_BT_ROOTPATH('u
.BI "void XYLA_BT_ROOTPATH(struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_node **" root ,
.BI " struct xyla_bt_node ***" path ", int &" k );
.ta \w'\fB\&void XYLA_BT_PARENTPATH('u
.BI "void XYLA_BT_PARENTPATH(struct xyla_bt_node *&" node ", unsigned &" pos ,
.BI " struct xyla_bt_node ***" path ", int &" k );
.ta \w'\fB\&void XYLA_BT_CHILDPATH('u
.BI "void XYLA_BT_CHILDPATH(int &" rc ", struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_node ***" path ", int &" k ,
.BI " " dir ", int " htmax );
.ne 5
.ta \w'\fB\&void XYLA_BT_RIPPLE('u
.BI "void XYLA_BT_RIPPLE(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **const *" path ", int " k );
.ta \w'\fB\&void XYLA_BT_ASCEND('u
.BI "void XYLA_BT_ASCEND(int &" rc ", struct xyla_bt_ascendfn *" fn,
.BI " struct xyla_bt_node **const *" path ", int " k ,
.BI " void *" arg );
.PP
.ne 10
.BR /* " Low-level search machinery. " */
.ta \w'\fB\&void *XYLA_BT_LOOKUP('u
.BI "void *XYLA_BT_LOOKUP(struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *" root ,
.BI " xyla_bt_navfn *" navfn ", void *" arg );
.ta \w'\fB\&void *XYLA_BT_PROBE('u
.BI "void *XYLA_BT_PROBE(int &" rc ", struct xyla_bt_node *&" node ,
.BI " struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *" root ,
.BI " xyla_bt_navfn *" navfn ", void *" arg ,
.BI " struct xyla_bt_node ***" path ", int &" k ", int " htmax );
.PP
.ne 6
.BR /* " Low-level removal machinery. " */
.ta \w'\fB\&int xyla_bt_remove('u +2n
.BI "int xyla_bt_remove(struct xyla_bt_node **" del_out ,
.BI " struct xyla_bt_node **" subst_out ,
.BI " struct xyla_bt_node **" replace_out ,
.BI " struct xyla_bt_node ***" path ", int *" k_inout ,
.BI " int " htmax );
.PP
.ne 6
.BR /* " Low-level set-operation machinery. " */
.ta \w'\fB\&typedef int xyla_bt_joinfn('u +2n
.BI "typedef int xyla_bt_joinfn(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" root_out ,
.BI " int *" rootht_out ,
.BI " struct xyla_bt_node **" left ", int " lht ,
.BI " struct xyla_bt_node *" mid ,
.BI " struct xyla_bt_node **" right ", int " rht );
.ne 7
.ta \w'\fB\&typedef int xyla_bt_splitatfn('u +2n
.BI "typedef int xyla_bt_splitatfn(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" left_out ,
.BI " int *" lht_out ,
.BI " struct xyla_bt_node **" mid_out ,
.BI " struct xyla_bt_node **" right_out ,
.BI " int *" lht_out ,
.BI " struct xyla_bt_node **" root ,
.BI " const void *" key );
.ne 7
.ta \w'\fB\&typedef int xyla_bt_splitrootfn('u +2n
.BI "typedef int xyla_bt_splitrootfn(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" left_out ,
.BI " int *" lht_out ,
.BI " struct xyla_bt_node **" root_out ,
.BI " struct xyla_bt_node **" right_out ,
.BI " int *" rht_out ,
.BI " struct xyla_bt_node **" root ,
.BI " int " ht );
.ne 6
.ta 2n
.B "struct xyla_bt_treeops {"
.B " xyla_bt_joinfn *join;"
.B " xyla_bt_splitatfn *splitat;"
.B " xyla_bt_splitrootfn *splitroot;"
.B "};"
.B "struct xyla_bt_setopstack { ...\& };"
.ne 9
.ta \w'\fB\&int xyla_bt_unisect('u +2n
.BI "int xyla_bt_unisect(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" uni_out ,
.BI " int *" uniht_out ,
.BI " struct xyla_bt_node **" isect_out ,
.BI " int *" isectht_out ,
.BI " struct xyla_bt_node **" aroot ", int " aht ,
.BI " struct xyla_bt_node **" broot ", int " bht ,
.BI " const struct xyla_bt_treeops *" ops ,
.BI " struct xyla_bt_setopstack *" stack ", int " htmax );
.ne 9
.ta \w'\fB\&int xyla_bt_diffsect('u +2n
.BI "int xyla_bt_diffsect(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node **" diff_out ,
.BI " int *" diffht_out ,
.BI " struct xyla_bt_node **" isect_out ,
.BI " int *" isectht_out ,
.BI " struct xyla_bt_node **" aroot ", int " aht ,
.BI " struct xyla_bt_node **" broot ,
.BI " const struct xyla_bt_treeops *" ops ,
.BI " struct xyla_bt_setopstack *" stack ", int " htmax );
.PP
.ta 2n
.ne 8
.BR /* " Low-level tree walk/check framework. " */
.B "enum {"
.B " XYLA_CHKOP_NIL,"
.B " XYLA_CHKOP_SETUP,"
.B " XYLA_CHKOP_BEFORE,"
.B " XYLA_CHKOP_MID,"
.B " XYLA_CHKOP_AFTER,"
.B " XYLA_CHKOP_TEARDOWN"
.B "};"
.ne 3
.B "#define XYLA_CHKF_COREMASK 0x003fu"
.B "#define XYLA_CHKF_TREEMASK 0x07c0u"
.B "#define XYLA_CHKF_APPMASK 0xf800u"
.PP
.ne 11
.ta 2n
.B "struct xyla_bt_check {"
.B " struct xyla_bt_nodecls *cls;"
.B " struct xyla_bt_node *const *root;"
.B " FILE *fp;"
.B " unsigned f;"
.B " struct xyla_bt_node *parent; void *parent_info;"
.B " struct xyla_bt_node *node; unsigned pos; void *node_info;"
.B " struct xyla_bt_node *left; void *left_info;"
.B " struct xyla_bt_node *right; void *right_info;"
.B " void *state;"
.B "};"
.PP
.ne 5
.ta \w'\fB\&typedef int xyla_bt_chkfn('u
.BI "typedef int xyla_bt_chkfn(unsigned " op ,
.BI " const struct xyla_bt_check *" chk );
.ta \w'\fB\&typedef void xyla_bt_idfn('u
.BI "typedef void xyla_bt_idfn(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_bt_node *" node ,
.BI " FILE *" fp );
.ne 9
.ta 2n
.B "struct xyla_bt_chkopslots {"
.B " xyla_bt_chkfn *chk; size_t infosz;"
.B " xyla_bt_idfn *id;"
.B "};"
.B "struct xyla_bt_chkops {"
.B " struct xyla_bt_nodeops node;"
.B " struct xyla_bt_ordopslots ord;"
.B " struct xyla_bt_chkopslots chk;"
.B "};"
.PP
.ne 6
.ta \w'\fB\&void xyla_bt_bughdr('u
.BI "void xyla_bt_bughdr(const char *" token ,
.BI " struct xyla_bt_node *const *" root ", FILE *" fp );
.ta \w'\fB\&void xyla_bt_printnode('u
.BI "void xyla_bt_printnode(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_bt_node *" root ", FILE *" fp );
.PP
.ne 6
.ta \w'\fB\&int xyla_bt_check('u +2n
.BI "int xyla_bt_check(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *const *" root ,
.BI " FILE *" fp ", unsigned " f ,
.BI " xyla_bt_chkfn *" tree_chkfn ,
.BI " size_t " tree_infosz ", void *" tree_state ,
.BI " void *" user_state );
.PP
.ne 2
.B "struct xyla_bt_ordinfo { ...\& };"
.BI "xyla_bt_chkfn xyla_bt_chkorder;"
.fi
.
.\"--------------------------------------------------------------------------
.SH "OVERVIEW"
.
Xyla is a library which implements binary trees.
.
.SS "Binary trees"
Binary trees are versatile data structures
which maintain an
.I ordered sequence
of items.
Sometimes we think of the items arranged in space,
and say that items earlier in the sequence are on the
.I left
and items later in the sequence are on the
.IR right .
(Apologies to readers whose culture conventionally arranges things
from right to left.)
Elements can be searched for;
enumerated;
inserted between any adjacent pair of existing items, or at either end;
and removed.
Sequences can be split at any position, and concatenated.
If the sequences are thought of as representing mathematical sets,
then intersections, unions, and differences can be determined.
.PP
Technically, a binary tree is either
.I nil
or consists of a
.IR node ,
which carries an item of data,
and has links to
.I left
and
.I right
.IR children ,
each of which is a binary tree.
The node is the
.I parent
of its children.
The node is sometimes called the tree's
.IR root ;
if a node's children aren't nil,
then they are the roots of their own
.IR subtrees .
Computer scientists learned about trees from genealogists,
who are bad at botany and think that a tree has its root at the
.I top
and grows
.IR downwards .
.PP
A binary tree's
.I height
is the largest number of links you have to follow,
starting from the root,
before you get to a nil subtree.
Most operations on binary trees work by descending a tree,
starting from the root and following links to children;
or ascending, working from parent to child until reaching the root,
so the tree's height is the primary limit on its performance.
Xyla implements several different kinds of binary trees
which differ according to the methods used to limit tree height,
and the resulting performance tradeoffs.
For example,
AVL and red-black trees offer deterministic good
.RI ( O (log\ n ))
performance,
but have relatively complicated and expensive algorithms;
splay trees offer good
.I amortized
performance,
but restructure trees as a result of conceptually read-only operations,
and any single operation may require
.IR O ( n )
time; and
treaps are randomized, offering
.I expected
good performance,
but require a good source of random data,
and may deliver poor performance in any given instance,
though with low probability.
.
.SS "Intrusive data structures"
The trees implemented by Xyla are
.IR intrusive :
the storage used to maintain the tree structure
is provided by the caller as part of its data.
The `intrusion' into the caller's data consists of a small
(generally about three words)
structure to be included as part of the caller's data.
Each kind of tree uses a different intrusion type.
.PP
The intrusion is most conveniently placed first,
but circumstances may dictate other positions.
For example, it's possible that a node is part of
several trees at the same time:
they require distinct intrusions, and at most one can be placed first.
A macro like the following may be useful in such cases.
.VS
.ta 2n +\w'\fB('u 56n
#define CONTAINER(type, mem, p) \e
(!sizeof((p) == &((type *)0)->mem) + \e
(type *)((unsigned char *)(p) \- offsetof(type, mem)))
.VE
.
.SS "Iterators"
An
.I iterator
is a data object which keeps track of
the progress of an iteration.
Once initialized,
an iterator returns each node in the tree,
in sequence order:
either left to right, or right to left.
(Iteration is therefore
.I external
rather than
.IR internal .
External iteration doesn't pervert
the control structure of the calling program.)
.PP
Iterators are simple structures that hold no resources.
A tree isn't changed at all when an iterator is created,
or when an iterator is advanced:
any number of iterators can be active on a single tree at a time; and
it is safe for several threads to iterate over a single tree
using their own separate iterators.
It is safe to discard an iterator without ceremony
once it's no longer needed.
.PP
Iterators are invalidated by operations
which alter the structure of a tree:
insertions, removals, splitting, joining, and set operations.
(Splay tree iterators are an exception to this rule:
a splay tree iterator remains valid
as long as the
.I next
node in the sequence continues to exist.)
.PP
(The primary reason for supporting right-to-left iteration is
.IR serialization .
A number of serialization formats precede each item with a
.I length
or
.I count
field.
In some formats \(en
notably DER, MessagePack, and CBOR \(en
this field may itself have variable size,
depending on its value.
Writing such formats is much more easily done in reverse order.)
.
.SS "Paths"
A
.I path
is a data object which identifies a position in a binary tree.
A
.I full
path identifies the position of a
.I "target node"
in the tree;
an
.I empty
path identifies the position of the
.I gap
between two nodes,
or at one of the extreme ends of the tree.
Note that a nil tree has a single gap, and,
in general, a tree containing
.I k
nodes
has
.IR k "\ +\ 1"
gaps.
.PP
Paths are simple structures that hold no resources.
A tree isn't changed at all when a path is created,
or when a path is modified:
any number of paths can be active on a single tree at a time; and
it is safe for several threads to navigate a single tree
using their own separate paths.
It is safe to discard a path without ceremony
once it's no longer needed.
Paths can also be copied safely,
e.g., by assignment or by calling
.BR memcpy (3),
but they may be quite large and most of the space isn't used
except in very large trees.
Functions are provided to copy just the relevant parts of paths.
(Splay tree paths are small and cam be copied simply.
But a function is still provided, for the sake of consistency.)
.PP
Functions are provided
to establish a path referring to particular nodes,
and to adjust paths so as to refer to other related nodes and gaps.
These operations split into two categories.
.hP \*o
.I Logical
operations act on the sequence of items held in a tree.
There are operations to establish a path referring to
the first or last node,
to adjust a path so as to refer to
the following or preceding node,
or to the gap just before or after the current node.
.hP \*o
.I Structural
operations act on the structure of the tree.
There is an operation to establish a path referring to
the tree's root node,
and operations to adjust a path to as to refer to
the current node's parent,
or to its left or right child.
.PP
A path can also be established by
.I probing
the tree: see
.B Searching
below.
.PP
Like iterators, paths are invalidated by operations
which alter the structure of a tree:
insertions, removals, splitting, joining, and set operations.
(Splay tree paths are an exception to this rule:
a full splay tree path remains valid
as long as the node to which it refers continues to exist;
an empty splay tree path remains valid
as long as the
.I parent
node
\(en whose nil subtree denotes the gap to which the path refers \(en
exists.)
.
.SS "Searching"
Trees are searched under the control of a
.IR "navigation function" .
This is given a pointer to a node and an opaque argument pointer
(and also a pointer to a node class, discussed below),
and returns a negative, zero, or positive value,
according to whether the position sought is
before, equal to, or after the given node.
For traditional
.IR "binary search trees" ,
where the nodes ordering matches the order of some
.I search key
held as part of each node,
the navigation function works very much like
the comparison functions used by the
.BR bsearch (3)
and
.BR qsort (3)
functions in the standard C library,
except that the arguments are in the opposite order.
.PP
A
.I lookup
just returns a pointer to the matching node, or null.
A
.I probe
additionally fills in a path
describing the position of the node in the tree,
or, if no matching node was found,
the gap where a matching node would be had it been found.
.
.SS "Node classes"
Many of Xyla's functions expect a
.I node class
argument.
A node class is represented by a simple structure
holding a pointer to an
.I operations table
mostly containing pointers to functions.
Applications can include additional data
following the operations-table pointer,
which the node-class functions can make use of.
Simple uses don't require a node class,
and you can safely pass a null pointer
if you don't need the additional functionality.
.PP
Consistent with this, searching a binary search trees needs
only an appropriate navigation function.
However, invariant-checking and set operations require an
.IR "ordered node class" ,
which is a node class with an extended operations table
containing additional function pointers.
Specifically, an ordered node class provides
a navigation function for searching a tree for a given key,
and a
.I key-projection function
which returns a pointer to a node's search key,
suitable for passing to the navigation function,
given the address of the node itself.
.
.SS "Tree operations"
A node can be inserted in the gap indicated by an empty path;
and the node identified by a full path can be removed from the tree.
.PP
A tree can be
.I split
at the position indicated by a path.
The result consists of two trees,
one containing all of the nodes strictly
.I preceding
the position indicated by the path,
and one containing all of the nodes strictly
.I following
that position;
if the path is full,
then the indicated node is returned separately,
being part of neither tree.
.PP
A pair of trees can be
.IR joined ,
with an optional
.IR "middle node" :
the result is a single tree containing,
in order,
all the nodes of the first tree,
followed by the middle node, if present,
followed by all the nodes of the second tree.
.PP
Finally, binary search trees which have an ordered node class
can be processed as
.IR sets .
Trees are considered as representing sets,
with each node representing an element;
equality of elements is determined by the ordered node class
key-projection and navigation functions.
Functions are provided to determine
set unions, intersections, and differences.
In order not to lose any nodes,
the functions provided are a little strange.
.hP \*o
A
.I unisect
operation determines both the union and intersection of two given sets.
Since the union consists of one copy of each element
occurring in
.I either
set,
and the intersection consists of one copy of each element
occurring in
.I both
sets,
the union and intersection trees between them hold
every node of the input trees.
.hP \*o
A
.I diffsect
operation determines both the difference and intersection of two given sets.
Since the difference consists of the elements of the first input set
which
.I don't
occur in the second,
while the intersection consists of the elements of the first input set
which
.I do
occur in the second,
the difference and intersection trees between them hold every node
of the first input tree;
the second input tree is unchanged.
.
.SS "Summary data"
Xyla trees can maintain per-node
.IR "summary data" .
This can be any information that an application wants to keep track of
that can be determined efficiently
from a node itself, and its children \(en
ideally in constant time.
A common example of a summary would be
a count of the number of nodes
in the subtree rooted at a particular node:
a node's count is clearly equal to
the sum of its children's counts, plus one for itself,
with an nil subtree having count zero.
This can be used to find the
.IR k th
node in order efficiently, using an appropriate navigation function.
.PP
Summary data is maintained by an
.I update function
provided by a node class.
The update function is called every time a node's children are modified,
as a result of restructuring the tree.
An update can also be requested explicitly
if an application changes a node's summary data itself,
and needs to
.I ripple
the change to the node's ancestors.
.PP
Finally, functions are provided to
.I ascend
the tree, accumulating information about
the node and its ancestors.
If each node records a count of nodes in its subtree,
then ascending the tree can be used to determine
the node's position in the overall sequence.
(See the
.B EXAMPLES
below for how all of this can be done.)
.
.SS "Checking"
Binary trees are complex structures,
and it's easy for bugs to creep in.
Xyla provides a framework for walking a binary tree reliably,
and checking that it respects the required invariants.
Applications can supply additional checks via the node class,
to verify that summary data (described above) is computed correctly.
.PP
The core of the checking machinery is a nonrecursive tree walk
which is robust against tangled links,
and can associate additional caller-provided information with each node.
.
.SS "Heights"
For some kinds of trees
(specifically, AVL and red-black)
the
.I join
operation is faster if the caller already knows
some simple metric about the tree.
For AVL trees, this is literally the tree's height,
while for red-black trees, it's the
.IR black-height ,
i.e., the (invariant) number of black nodes on every path
starting from the root.
We call this measure the
.IR height ,
just because it's related to the height in the current examples.
The height of a nil tree is always zero.
The insertion and deletion operations on trees
for which height is significant
return a distinctive status code to indicate that
the tree's height has changed by one.
Callers uninterested in maintaining the tree height can safely ignore this.
Operations involving splitting and joining trees
(including the set operations)
have additional arguments
(compared to the versions for trees for which height is unimportant)
for the application to provide the heights of input trees
(or \-1 if the application neglected to keep track)
and/or receive the heights of the output trees.
The invariant-checking functions
for trees where height is relevant
have an additional argument so that,
if the application is tracking the tree height,
it can check that it has the correct value.
.
.\"--------------------------------------------------------------------------
.SH "FUNDAMENTALS"
.
.SS "Return codes"
A number of Xyla functions return an integer-valued
.IR "return code" .
Each possible return code has a name of the form
.BR XYLA_RC_ ...\&.
A nonnegative value indicates that the operation completed successfully;
a negative value indicates failure.
The most common success code is
.BR XYLA_RC_OK ,
which has the value zero, but others are possible.
.PP
Currently, Xyla has very few opportunities for failure.
Indeed, many functions are defined to return success in all cases.
A future version of Xyla will not return a code
that the current version does not return
to the same application.
.PP
The currently defined return codes are as follows.
.TP
.B XYLA_RC_OK
The operation completed successfully.
This code has the numeric value zero.
.TP
.B XYLA_RC_HTCHG
The operation \(en insertion or removal \(en completed successfully,
and, as a result, the tree's height has changed, by exactly one.
.TP
.B XYLA_RC_FAIL
The operation failed in an unspecific manner.
This code has the numeric value \-1.
Xyla itself will not originate this code:
it can only come from an application-provided function.
.TP
.B XYLA_RC_TALL
The operation failed because the tree height exceeded a predefined limit.
.TP
.B XYLA_RC_BAD
The tree structure was determined to be invalid.
.TP
.B XYLA_RC_NOMEM
The operation failed because memory could not be allocated.
.PP
Additionally, two constants
.B XYLA_RC_LOSELIMIT
and
.B XYLA_RC_WINLIMIT
are defined.
Every failure code has a value greater than or equal to
.B XYLA_RC_LOSELIMIT
and strictly less than zero;
and every success code has a value greater than or equal to zero,
and strictly less than
.BR XYLA_RC_WINLIMIT .
.
.SS "Node structures"
The application is responsible for defining the structure of a node.
Indeed,
with the exception of the checking framework,
Xyla works only with memory provided by the application;
but there must be some space for Xyla to maintain its tree structures,
which we call the
.IR "control data" .
At minimum, each node must have links to its left and right subtrees;
though, in fact, each kind of tree requires
some additional control data beyond the two links.
.PP
The left and right links are held in a simple structure named
.BR "struct xyla_bt_node" ;
the links themselves are members of this structure named
.B left
and
.BR right ,
in that order, and there are no other members.
The
.B left
and
.B right
links are each a pointer to the
.B "struct xyla_bt_node"
structure within the referenced node,
which must be explicitly converted to a containing type.
.PP
As mentioned, each kind of tree has its own additional control data.
The whole control data for node is packaged up in a structure named
.BI "struct xyla_" tree "_node" \fR.
The easy way to allocate space for the control data
in an application's node structure
is simply to include a member of type
.BI "struct xyla_" tree "_node"
somewhere in the node structure;
this member is called the
.IR intrusion .
It's often most convenient to place the intrusion
as the first member in the node structure:
this makes it easiest to convert between
pointers to the node as a whole and to the intrusion.
.PP
The cost of this simple approach is that
it can waste a little storage.
The additional control data for some trees is an
.B unsigned
integer field containing a small number of flags.
(The majority of this field is available for application use:
see
.B "TREE IDIOSYNCRASIES"
below.)
Since the
.B left
and
.B right
links are both pointers,
on a 64-bit target,
the overall intrusion structure ends up with
a four-byte `gap' at the end.
(There is no such gap on 32-bit targets.
Other targets may have different structure layout problems.)
.PP
This gap can be filled if the application defines
its node structure in a more complicated way.
Each tree type defines its additional control data as a
.I separate
structure, named
.BI "struct xyla_" tree "_nodeslots" \fR;
the
.BI "struct xyla_" tree "_node"
structure is then
.IP
.ne 4
.nf
.ta 2n
.BI "struct xyla_" tree "_node {"
.B " struct xyla_bt_node bt;"
.BI " struct xyla_" tree "_nodeslots " t ;
.B "};"
.fi
.PP
where
.I t
here is an abbreviated `nickname' for the tree type.
This sequence of members is defined in a macro
.BI XYLA_ TREE _NODEPFX
so that it can be inserted directly into a node structure.
There, it can be followed immediately by additional small members
in order to make use of the otherwise wasted space.
Of course, now we've lost the real
.BI "struct xyla_" tree "_node"
structure which is occasionally needed,
most notably for insertion.
The solution here is to define a
.I union
with one member having the type
.BI "struct xyla_" tree "_node"
for passing into the library,
while another contains the additional data
packed into the gap.
Again, a macro is provided:
.BI XYLA_ TREE _NODEUSFX
lists entire control-data structure types,
this time in decreasing inclusion order.
.PP
For example, one might define
.IP
.ne 10
.nf
.ta 2n +2n +2n
.B "struct node {"
.B " union {"
.B " struct {"
.B " XYLA_AVL_NODEPFX;"
.B " unsigned info;"
.BR " /* " "additional members here" " */"
.B " } node;"
.B " XYLA_AVL_NODEUSFX;"
.B " } u;"
.BR " /* " "additional members here" " */"
.B };
.fi
.PP
Other tricks are surely possible.
.
.SS "Trees"
A tree as a whole is represented by a pointer to its
.IR "root link" :
i.e., the address of a pointer to the root node.
The root link will be null
if and only if the tree is nil (empty);
a new tree is initialized simply by setting its root link to null.
.PP
In the Xyla programming interface,
the root link always has type
.BR "struct xyla_bt_node" ,
rather than the more specific node type
for a particular kind of node.
.
.SS "Navigation functions"
A
.I "navigation function"
is represented by the type
.IP
.nf
.ne 3
.ta \w'\fB\&typedef int xyla_bt_navfn('u
.BI "typedef int xyla_bt_navfn(struct xyla_bt_nodecls *" cls ,
.BI " const struct xyla_bt_node *" node ,
.BI " void *" arg );
.fi
.PP
The search functions are given a pointer to a navigation function
to guide them through the tree to find the intended node.
The
.I cls
and
.I arg
arguments are provided to the search function by the caller.
The
.I arg
pointer is not otherwise interpreted;
the
.I cls
is used to determine a default navigation function
if none is otherwise provided \(en see
.B "Ordering operations"
below.
The
.I node
argument points to a node within the tree.
The navigation function returns an integer, which must be
negative if the desired node is earlier in the sequence than the given
.IR node ,
positive if the desired node is later in the sequence, or
zero if the
.I node
is in fact the desired node.
.PP
A navigation function for a binary search tree
simply compares the search key against the given node,
and returns negative if the search key orders before the key in the node,
zero if they're equal,
or positive if the key orders after the key in the node.
.
.SS "Node classes"
A
.I node class
is represented by a pointer to a value of type
.BR "struct xyla_bt_nodecls" ,
which has a single member:
.VS
.ne 3
.ta 2n
struct xyla_bt_nodecls {
const struct xyla_bt_nodeops *ops;
};
.VE
The
.B "struct xyla_bt_nodeops"
structure will be explained in full below.
For now, it is mostly a table of function pointers,
together with other constant data.
The function pointers are each provided,
as an argument,
the original node-class pointer.
If the
.B "struct xyla_bt_nodecls"
structure is embedded in a larger structure
(usually, again, at the beginning, but this is not necessary)
then these functions can readily recover the larger structure,
which may contain additional constext information,
or maintain state which the functions must maintain.
.PP
The
.B "struct xyla_bt_nodeops"
structure may, in fact, be only the first part
of a larger table of information.
To allow for later compatible extensions,
first member of
.B "struct xyla_bt_nodeops"
simply holds the size of the overall structure, as allocated.
One might therefore write
.VS
.ta 2n
static const struct xyla_bt_nodeops my_nodeops =
{ sizeof(my_nodeops), ... };
.VE
to initialize such a table.
.
.SS "Node operations"
The basic operations are held directly in the
.B "struct xyla_bt_nodeops"
structure.
.VS
.ne 5
.ta 2
struct xyla_bt_nodeops {
size_t sz;
xyla_bt_updfn *upd;
xyla_bt_dummyfn *_rsvd2, *_rsvd3;
};
.VE
The
.B sz
member was described above.
.PP
The
.B upd
function is called after a node has been changed,
and is expected to update any
.I summary data
held by the node.
If either the node-class pointer or the function pointer is null,
then nothing happens.
(This slightly improves the speed of operations
like insertion and removal,
since they don't necessarily need to trace all the way to the tree root.)
.IP
.ne 2
.nf
.ta \w'\fB\&typedef void xyla_bt_updfn('u
.BI "typedef void xyla_bt_updfn(struct xyla_bt_nodecls *" cls ,
.BI " struct xyla_bt_node *" node );
.fi
.PP
The function is given the node-class pointer
.IR cls ,
and a pointer to the
.I node
that has been changed.
It is expected to find the node's children
by inspecting the
.B left
and
.B right
links directly.
.PP
The
.B _rsvd2
and
.B _rsvd3
members are reserved for future expansion,
and must be null for forward compatibility.
.
.SS "Ordering operations"
You don't need to to anything more
to make a binary search tree work in Xyla
than to write the appropriate navigation function
and pass it to the search function.
.PP
Ordering operations are additional functions
which can be provided by a node class
in order to compare two nodes,
rather than compare a node with a search key.
The additional operations are part of
.BR "struct xyla_bt_ordopslots" :
.IP
.nf
.ne 4
.ta 2n
.B "struct xyla_bt_ordopslots {"
.B " xyla_bt_navfn *nav;"
.B " xyla_bt_keyfn *key;"
.B "};"
.fi
.PP
This is bundled together with the basic
.B "struct xyla_bt_nodeops"
in
.BR "struct xyla_bt_ordops" :
.IP
.nf
.ne 4
.ta 2n
.B "struct xyla_bt_ordops {"
.B " struct xyla_bt_nodeops node;"
.B " struct xyla_bt_ordopslots ord;"
.B "};"
.fi
.PP
The application allocates and fills in a
.B "struct xyla_bt_ordops" ,
and then sets the
.B ops
pointer of the
.B "struct xyla_bt_nodecls"
to point to the
.B node
member.
.PP
The
.B nav
member is a navigation function,
as described above.
.PP
The
.B key
member is a pointer to a
.IR "key function" :
.IP
.nf
.ie t \{\
. ne 2
. ta \w'\fB\&typedef const void *xyla_bt_keyfn('u
. BI "typedef const void *xyla_bt_keyfn(struct xyla_bt_nodecls *" cls ,
. BI " const struct xyla_bt_node *" node );
.\}
.el \{\
. ne 3
. ta 2n +\w'\fB('u
. BI "typedef const void *xyla_bt_keyfn"
. BI " (struct xyla_bt_nodecls *" cls ,
. BI " const struct xyla_bt_node *" node );
.\}
.fi
.PP
Given a pointer to a tree node,
the key function must return a pointer to its search key.
The required property is that, if
.I node
is some node within a tree with root
.IR root ,
then
.IP
.BI "xyla_" tree "_lookup(" cls ", " root ,
.IB navfn ", " keyfn "(" cls ", " node ))
.PP
should find the original
.IR node .
.PP
The pointer that the key function returns must remain valid
for the duration of the current call into the Xyla library.
The key function is expected to be fast'
it should not need to allocate memory:
releasing any allocated memory later is the application's responsibility.
.PP
If it's possible to write a simple key function
which efficiently extracts a search key from a node
that works with the navigation function used for searching,
then that's probably the best approach.
The alternative is for the key function simply
to return the node pointer unchanged:
then the order-operation navigation function
compares the two nodes directly against each other
and returns the appropriate ordering.
Of course, this will probably have to be a
.I different
navigation function from the one used for general search.
.PP
The ordering operations are used in four contexts.
.hP \*o
The navigation function will be used as a fallback
by Xyla's tree search functions
if their
.I navfn
argument is null.
This indicates to a reader that
you're searching the tree in the usual way,
and saves a little typing,
in the (hopefully common) case where the
ordering-operations and search navigation functions are identical.
.hP \*o
The ordering operations are used by the
.BI xyla_ tree _splitat
function to find the split point within a tree.
.hP \*o
The
.B "Set operations"
described below use
.BI xyla_ tree _splitat \fR,
and therefore require the ordering operations.
.hP \*o
The
.B xyla_bt_chkorder
function can be used
as part of the checking machinery
to verify that a tree satisfies the ordering properties
required by the ordering operations.
See
.B "CHECKING FRAMEWORK"
below.
.
.\"--------------------------------------------------------------------------
.SH "MISCELLANEOUS FUNCTIONS"
.
This section describes functions of general utility
which aren't part of the implementation of a specific tree structure.
.PP
The
.B xyla_strerror
function returns a pointer to a human readable message string
corresponding to a return code
.IR rc .
If
.I rc
is not a defined error code,
the function returns a pointer to a generic `unknown error code' message.
The returned pointer will remain valid indefinitely.
The function is safe to call from multiple threads concurrently.
.PP
The
.B xyla_bt_severfirst
function detaches the first (leftmost) node from a tree,
and returns a pointer to it;
if the tree is nil, then a null pointer is returned.
The tree structure is modified:
a complete iteration completes in
.IR O ( n )
time, using only constant space.
The modified structure preserves the node ordering,
but is very unlikely to satisfy the usual tree invariants.
In particular, it does
.I not
call any application-provided update function.
Once this function has been called on a tree,
the only safe thing to do with the tree is
to continue severing nodes until the tree is nil.
.
.\"--------------------------------------------------------------------------
.SH "TREE OPERATIONS"
.
This section describes the main functions
provided to operate on binary trees.
Each kind of tree provides its own version
each of the following functions,
but they behave in very similar ways,
so they are all described here together.
In the following descriptions,
.I tree
(in italics)
stands for the name of one of the kinds of tree:
.BR avl ,
.BR rb ,
.BR splay ,
or
.BR treap .
.PP
Behaviour specific to particular kinds of tree
is described in the next section,
.BR "TREE IDIOSYNCRASIES" .
.
.SS "Iteration"
Iteration produces the nodes in the tree
in their proper order,
from start to end.
An iterator value has the type
.BI "struct xyla_" tree "_iter" \fR.
.PP
An iterator is initialized by calling
.BI xyla_ tree _inititer \fR,
passing it pointers
to the tree's root node, and
the iterator to initialize.
.PP
The
.BI xyla_ tree _next
function returns the next node in sequence
(so, for example it returns the first node
when called on a freshly initialized iterator).
Once there are no more nodes to return,
.BI xyla_ tree _next
returns null.
.PP
Iterators don't need any kind of explicit teardown.
.PP
Iterators become invalid when the tree is changed,
i.e., if a node is inserted or removed,
the tree is split, or
the tree is used as an operand to a join or set operation.
(Splay tree iterators remain valid in more cases than this:
see
.B "TREE IDIOSYNCRASIES"
below.)
It is guaranteed that continuing with iteration
will not access any node that has been previously returned.
Consequently, code such as
.VS
struct xyla_\fI\&tree\fP_iter it;
xyla_\fI\&tree\fP_inititer(&root, &it);
for (;;) {
node = xyla_\fI\&tree\fP_next(&it);
free(node);
}
.VE
is a valid way to free all of the nodes of a tree
which have been allocated using
.BR malloc (3).
However, the
.B xyla_bt_severfirst
function is probably better for this task,
.PP
.I "Reverse iteration"
produces the nodes in the tree in reverse order,
from end to start.
A reverse iterator has a distinct type,
.BI "struct xyla_" tree "_riter" \fR,
to prevent mix-ups.
It's not possible to `switch' iteration direction:
this kind of flexibility is possible using paths,
but not with iterators.
.PP
The functions
.BI xyla_ tree _initriter
and
.BI xyla_ tree _prev
work exactly like
.BI xyla_ tree _inititer
and
.BI xyla_ tree _next
described above,
except that they operate on reverse iterators,
and return the nodes in the opposite order.
.
.SS "Path operations"
A path describes the position of a
.I "target node"
in a tree
(a
.IR "full path" ),
or a
.I gap
between two nodes or at either end of the sequence
(an
.IR "empty path" ).
A path value has the type
.BI "struct xyla_" tree "_path" \fR.
.PP
Several functions initialize paths:
.BI xyla_ tree _copypath \fR,
.BI xyla_ tree _firstpath \fR,
.BI xyla_ tree _lastpath \fR,
and
.BI xyla_ tree _rootpath
are described later in this section;
.BI xyla_ tree _probe
is described under
.B "Search, insertion, and removal"
below.
.PP
Paths don't need any kind of explicit teardown.
.PP
Paths become invalid when the tree is changed,
i.e., if a node is inserted or removed,
the tree is split, or
the tree is used as an operand to a join or set operation.
(Splay tree paths remain valid in more cases than this:
see
.B "TREE IDIOSYNCRASIES"
below.)
.PP
The function
.BI xyla_ tree _current
is passed a pointer to a path.
if the path is full,
it returns a pointer to the target node;
if the path is empty,
then it returns null.
.PP
The function
.BI xyla_ tree _copypath
sets
.I newpath
to be a copy of
.IR path .
You can copy a path using ordinary assignment,
but the function knows how to copy
only the currently active parts of the path structure:
for most tree types,
a path structure contains a vector of pointers
(see under
.B PRIMITIVES
for more detail),
which is big enough for the largest possible tree.
Most of the vector is unused for most trees,
so copying it is a waste of time;
but a structure assignment doesn't know how to do that.
.PP
The next group of functions to be discussed
initializes or modifies a path according to the
.I logical
sequence represented by a tree.
.PP
The function
.BI xyla_ tree _firstpath
initializes a
.I path
to target the first node in the tree
whose root link is at
.I root ,
and returns a pointer to this first node.
If the tree is nil, then the path will be empty,
and the function returns null.
The function
.BI xyla_ tree _lastpath
is similar,
except that it targets and returns
the last node in the sequence
rather than the first.
.PP
The function
.BI xyla_ tree _nextpath
modifies an existing
.I path
so as to target the next node in sequence, if any.
If the
.I path
initially targets a node
other than the last in the sequence,
then it is modified to target the next node in sequence.
If the
.I path
initially targets a gap
other than the gap after the last node,
then it is modified to target the node following the gap.
If the
.I path
initially targets the last node in sequence,
then it is updated to target the gap after the last node.
If the
.I path
initially targets the gap after the last node,
then it is unchanged.
The function returns the next node, if there is one,
or null otherwise.
The function
.BI xyla_ tree _prevpath
is similar,
except that it targets and returns
the previous node in the sequence,
unless the
.I path
currently targets the first node or the gap before it,
in which case it targets the gap before the first node
and returns null.
.PP
The function
.BI xyla_ tree _beforepath
modifies an existing full
.I path
so as to target the gap
immediately before the current target node \(en
so following with a call to
.BI xyla_ tree _nextpath
would target and return the original target node again,
and following with a call to
.BI xyla_ tree _prevpath
would target and return the previous node,
as if
.BI xyla_ tree _beforepath
had not been called.
The function
.BI xyla_ tree _afterpath
is similar,
except that it targets the gap
immediately after the current target node.
It is an error to call
.BI xyla_ tree _beforepath
or
.BI xyla_ tree _afterpath
with an empty
.IR path .
.PP
The next group of functions to be discussed
initializes or modifies a path according to the
.I structure
of a tree.
.PP
The function
.BI xyla_ tree _rootpath
initializes a
.I path
to target the root node of the tree
whose root link is at
.IR root ,
and returns a pointer to the root node.
If the tree is nil,
then the
.I path
is initialized to target the single gap,
and the function returns null.
.PP
The function
.BI xyla_ tree _uppath
modifies an existing full
.I path
so as to target the parent of the current target node,
sets
.BI * pos_out
to
.B XYLA_BTPOS_LEFT
or
.B XYLA_BTPOS_RIGHT
respectively according to whether the current target node is
the left or right child of its parent,
and returns the parent node.
If the current target node is the tree root,
then the function leaves
.I path
unchanged,
sets
.BI * pos_out
to
.BR XYLA_BTPOS_ROOT ,
and returns null.
It is an error to call
.BI xyla_ tree _uppath
with an empty
.IR path .
.PP
The function
.BI xyla_ tree _leftpath
modifies an existing full
.I path
so as to target the current target node's left child,
and return this left child;
if the left child is nil,
then instead
the
.I path
is modified to target the gap immediately before the current target node,
and the function returns null.
The function
.BI xyla_ tree _rightpath
is similar,
except that it targets and returns the current target node's right child,
or the gap immediately after the current target node.
It is an error to call
.BI xyla_ tree _leftpath
or
.BI xyla_ tree _rightpath
on an empty
.IR path .
.PP
The function
.BI xyla_ tree _replace
replaces the current target node of a full
.I path
with a new
.IR node .
The previous target node is no longer part of the tree structure
and can be released or otherwise invalidated.
No update functions are called:
call
.BI xyla_ tree _ripple
if necessary.
It is an error to call
.BI xyla_ tree _replace
with an empty
.IR path .
.PP
The function
.BI xyla_ tree _ripple
is passed a node-class pointer
.I cls
and a full
.IR path ,
and updates the summary data in the target node's ancestor chain,
which is useful if the application has changed the node's summary data
so that the summaries held by the node's ancestors are now out of date.
The function calls the node class's
.B upd
function on the
parent of the
.IR path 's
current target node,
then the parent's parent,
and so on,
and finally it calls
.B upd
on the tree root.
The
.B upd
function is not called on the target node itself.
If the
.I path
targets the root node
then nothing happens.
In any event, the
.I path
is left unchanged and valid.
It is an error to call
.BI xyla_ tree _ripple
with an empty
.I path ,
with a null
.IR cls ,
or with a null
.B upd
function pointer in the node class operations table.
.PP
The function
.BI xyla_ tree _ascend
ascends a tree,
starting from a the target node or gap of a
.IR path ,
calling a function
.I fn
on each node as it goes.
It is used to compute information about a node or gap
using the summary data of the nodes along the path:
see the
.B EXAMPLES
below.
The
.I fn
is called with five arguments:
.IP
.nf
.ne 4
.ta \w'\fB\&typedef int xyla_bt_ascendfn('u
.BI "typedef int xyla_bt_ascendfn(struct xyla_bt_node *" node ,
.BI " struct xyla_bt_node *" parent ,
.BI " struct xyla_bt_node *" sibling ,
.BI " unsigned " pos ", void *" arg );
.fi
.PP
It is called at least once:
in the first call,
if the
.I path
is full, then
.I
node points to the
.IR path 's
target node;
otherwise
.I node
is null.
The
.I parent
argument is the
.IR node 's
parent,
or the node whose nil child link represents the gap
that is the
.IR path 's
target;
if the
.I node
is the root node, or the tree is nil, then
.I parent
is null
The
.I sibling
argument is the other child link of the
.IR parent ,
i.e., the one that is not
.IR node ;
this is null if
.I parent
is null.
The
.I pos
argument is
.B XYLA_BTPOS_ROOT
if the
.I node
is the root node,
or the tree is nil \(en
i.e., if and only if
.I parent
is null;
.I pos
is
.B XYLA_BTPOS_LEFT
or
.B XYLA_BTPOS_RIGHT
respectively if
.I node
is the left or right link of
.IR parent .
The
.I arg
argument to
.I fn
is the
.I arg
passed to
.BI xyla_ tree _ascend \fR.
If
.I fn
returns a nonzero value, then
.BI xyla_ tree _ascend
returns that nonzero value immediately;
otherwise, if the initial
.I node
was the tree root, or the tree is nil \(en
i.e., if the first call had
.I pos
equal to
.B XYLA_BTPOS_ROOT
\(en then
.BI xyla_ tree _ascend
returns zero.
Otherwise, it calls
.I fn
again,
this time with
.I node
equal to the previous
.IR parent ,
and the other arguments set appropriately.
This process continues until either
.I fn
returns a nonzero value,
which becomes the return value of
.BI xyla_ tree _ascend \fR,
or
.I fn
returns zero when passed the root node,
in which case
.BI xyla_ tree _ascend
returns zero.
.
.SS "Search, insertion, and removal"
The function
.BI xyla_ tree _lookup
searches a tree.
It is given a pointer
.I cls
to a node class, which may be null;
the address
.I root
of the tree's root link,
a navigation function
.IR nav ,
and an argument pointer
.I arg
to pass to the navigation function.
.PP
The search begins at the root
and descends through left and right child links,
as directed by a navigation function.
If the desired node is found \(en
i.e., the navigation function returns zero given some node \(en
then
.BI xyla_ tree _lookup
returns a pointer to that node;
otherwise, it returns null.
If
.I nav
is null then the navigation function
from the node class's order operations is used;
it is an error if
.I nav
is null
and either
.I cls
is null or
does not have order operations.
.PP
The function
.BI xyla_ tree _probe
is similar,
except that it also initializes a given
.IR path .
If the desired node was found, then
.I path
is initialized to target the node;
otherwise, the
.I path
is initialized to target the gap
in which the selected node would have been
(or, more concretely,
the gap corresponding to the nil subtree
which terminated the search).
.PP
The function
.BI xyla_ tree _insert
inserts a new
.I node
into the gap targetted by an empty
.IR path .
It is an error to provide a full path to this function.
The function returns
.B XYLA_RC_OK normally,
or
.B XYLA_RC_HTCHG
if the tree height increased by one.
(Tree types which don't have a notion of height
never return
.BR XYLA_RC_HTCHG .)
.PP
The function
.BI xyla_ tree _remove
removes the node targetted by a full
.I path
from its tree.
It is an error to provide an empty path to this function.
The function returns
.B XYLA_RC_OK normally,
or
.B XYLA_RC_HTCHG
if the tree height decreased by one.
(Tree types which don't have a notion of height
never return
.BR XYLA_RC_HTCHG .)
.
.SS "Joining and splitting"
The function
.BI xyla_ tree _join
concatenates two trees,
together with an optional separating node.
The resulting tree consists of, in order,
the nodes from the
.I left
tree;
the
.I mid
node, if this is not null; and
the nodes from the
.I right
tree.
The root of the resulting tree is stored in
.BI * root_out \fR;
null pointers are stored in
.BI * left
and
.BI * right \fR.
It is permitted for
.I left
or
.I right
to be equal to
.IR root_out .
If the tree type has a notion of height,
then there are additional arguments.
the caller can supply the heights of the left and right trees as
.I lht
and
.IR rht ;
either or both of these may be \-1
to indicate that the relevant tree's height isn't known.
The height of the output tree is stored in
.BI * rootht_out
if this is not a null pointer.
(Note that the output height is always determined correctly,
even if the input heights are unknown.)
The function always returns
.BR XYLA_RC_OK .
.PP
The function
.BI xyla_ tree _split
splits a tree at the position indicated by a path.
In general, the result consists of three pieces:
a (possibly nil)
.I left
tree containing the nodes before the indicated position;
the path's target node, if the path is full; and
a (possibly nil)
.I right
tree containing the nodes after the indicated position.
The root of the left tree is stored in
.BI * left_out \fR;
the target node, or a null pointer if the path is empty, is stored in
.BI * mid_out \fR;
and the root of the right tree is stored in
.BI * right_out \fR.
If the tree type has a notion of height,
then the heights of the left and right trees are stored in
.BI * lht_out
and
.BI * rht_out \fR;
these pointers can be null to discard the height information.
On exit, the original root is set to a null pointer,
unless the original root address is equal to one of the output pointers.
The function always returns
.BR XYLA_RC_OK .
.PP
The function
.BI xyla_ tree _splitat
splits a binary search tree,
whose root pointer is at
.BI * root \fR,
at the position indicated by a search
.IR key .
In general, the result consists of three pieces:
a (possibly nil)
.I prefix
tree containing the nodes whose keys order before the given key;
the node whose key matches the given key, if any; and
the path's target node, if the path is full; and
a (possibly nil)
.I suffix
tree containing the nodes whose keys order after the given key.
Calling this function is exactly the same as calling
.BI xyla_ tree _probe
to find the position of the matching node followed by
.BI xyla_ tree _split
to split the tree at that point.
The function always returns
.BR XYLA_RC_OK .
.PP
The function
.BI xyla_ tree _splitroot
splits the tree,
whose root pointer is at
.BI * root \fR,
at its root.
In general, the result consists of three pieces:
the root's left subtree;
the root node itself, if the tree was not initially nil; and
the root's right subtree.
(Both the left and right subtrees are nil if the input tree was nil.)
The root of the left subtree is stored in
.BI * left_out \fR;
the root node, or a null pointer if the tree was nil, is stored in
.BI * root_out \fR;
and the root of the right subtree is stored in
.BI * right_out \fR.
If the tree type has a notion of height,
then the input tree's height is provided as
.IR ht ;
this may be \-1 if the correct value is unknown.
The heights of the left and right subtrees are stored in
.BI * lht_out
and
.BI * rht_out \fR;
these pointers can be null to discard the height information.
On exit, the original root is set to a null pointer,
unless the original root address is equal to one of the output pointers.
The function always returns
.BR XYLA_RC_OK .
.
.SS "Set operations"
The functions
.BI xyla_ tree _unisect
and
.BI xyla_ tree _diffsect
operate on trees representing
.I sets
of items.
Each function receives two input trees,
named
.I A
and
.IR B .
Such a tree must be a binary search tree,
strictly ordered according to the
.I key
and
.I nav
operations of the provided node class;
the output trees will be ordered in the same way.
We shall say that a key is
.I common
if both input trees contain a node matching that key,
or that the key is
.I A-only
or
.I B-only
if only the
.I A
or
.I B
tree, respectively, contains a node matching that key.
In the case of a common key,
then we say that the two nodes bearing that key,
one from each of the
.I A
and
.I B
trees,
.IR match .
.PP
The function
.BI xyla_ tree _unisect
simultaneously computes
the union and intersection
of two sets represented as binary search trees
.I A
and
.IR B .
The input trees are destroyed:
each individual node ends up in one or other of the output trees.
Nodes with
.IR A -only
and
.IR B -only
keys end up in the union tree;
one of each pair of matching nodes ends up in the union tree,
and the other goes into the intersection tree.
The addresses of the input tree roots are supplied as the
.I aroot
and
.I broot
arguments;
the output tree roots are written to
.BR * uni_out
and
.BR * isect_out \fR.
For tree types with a notion of height,
the heights of the input trees are provided as
.I aht
and
.IR bht ;
either or both of these may be \-1
if the caller doesn't know the correct value.
The heights of the output trees are stored in
.BR * uniht_out
and
.BR *isectht_out \fR;
either or both of these may be null to discard the height information.
The function always returns
.BR XYLA_RC_OK .
.PP
The function
.BI xyla_ tree _diffsect
simultaneously computes
the difference and intersection
of two sets represented as binary search trees
.I A
and
.IR B .
The input tree
.I A
is destroyed:
each node ends up in one or other of the output trees.
The
.I B
tree is not modified.
Nodes with
.IR A -only
keys end up in the difference tree;
nodes with common keys
end up in the intersection tree.
The addresses of the input tree roots are supplied as the
.I aroot
and
.I broot
arguments;
the output tree roots are written to
.BR * diff_out
and
.BR * isect_out \fR.
For tree types with a notion of height,
the heights of the input
.I A
tree is provided as
.IR aht ;
this may be \-1 if the caller doesn't know the correct value.
The heights of the output trees are stored in
.BR * diffht_out
and
.BR *isectht_out \fR;
either or both of these may be null to discard the height information.
The function always returns
.BR XYLA_RC_OK .
.
.SS "Checking"
The function
.BI xyla_ tree _check
walks a tree and checks that all of the applicable invariants hold.
The address of the tree root pointer is passed as
.BI * root \fR.
Diagnostics are printed to the
.BR stdio (3)
stream
.IR fp ;
commonly, this is
.BR stderr ,
and it may be null to suppress output altogether.
If the tree type has a notion of height,
then
.I expht
should hold the application's understanding of the tree's height,
to be checked against the true height;
this may be \-1 if the height is unknown.
If the node class
.I cls
provides checking operations
then these will be called at the relevant points
while the checking proceeds;
the
.I arg
will be passed to these operations as the `user state' pointer.
Note that checking will
.I mutate
the tree!
It is not safe to check a tree
while it is being accessed by another thread.
See the section
.B "CHECKING FRAMEWORK"
below for full details.
The function returns
.B XYLA_RC_OK
on success,
.B XYLA_RC_BAD
if problems were found,
or some other nonzero value if checking was aborted.
.
.\"--------------------------------------------------------------------------
.SH "TREE IDIOSYNCRASIES"
.
.SS "AVL trees"
An
.I Adelson-Velsky\(en\&Landis
.RI ( AVL )
tree is a height-balanced binary tree:
the heights of the two subtrees of any node can differ by at most one.
.PP
AVL trees offer good deterministic performance,
and are an excellent choice for general-purpose use.
.PP
AVL trees have a notion of `height',
which is exactly the actual height of the tree,
i.e., the maximum number of links which can be traversed
starting from the root
before a null link is encountered.
.PP
The structure of an AVL tree is adjusted
when nodes are inserted or removed.
This invalidates all iterators and paths referring to the tree.
It is safe for multiple threads to search an AVL tree concurrently.
.PP
The additional control data associated with an AVL tree node
consists of a flags word
.IR f .
The low two bits of the flags word are used by the implementation;
the remaining bits are available for use by applications:
Xyla will not alter these bits.
The use of the low bits is subject to change in future versions,
but, for diagnostic purposes, it might be useful to know that
bit 0 is set if the node is `left-biased',
i.e., the left subtree is higher (by one) than the right subtree;
bit 1 is set if the node is `right-biased',
i.e., the left subtree is shorter (by one) than the right subtee;
both bits are clear if the node is `balanced',
i.e., both subtrees have the same height.
.
.SS "Red-black trees"
A red-black tree is a height-balanced binary tree,
initially presented by Leonidas Guibas and Robert Sedgewick,
as an encoding of Rudolf Bayer's 2-3-4 trees using binary trees.
.PP
Red-black trees are less well-balanced than AVL trees.
They can perform better
when trees are modified much more than they are searched.
.PP
Each node is assigned a colour, either red or black;
the colour assignment must obey the following rules.
.hP \*o
The root is black.
.hP \*o
If a node is red, its children must be black.
.hP \*o
Every path from root to leaf must have the
.I same
number of black nodes,
called the tree's
.IR "black height" .
.PP
Red-black trees don't maintain such rigid balance as AVL.
This may make update-heavy workloads somewhat more efficient,
though the rebalancing algorithms aren't significantly simpler
than those for AVL trees;
search-heavy workloads are likely to perform worse.
.PP
The structure of a red-black tree is adjusted
when nodes are inserted or removed.
This invalidates all iterators and paths referring to the tree.
It is safe for multiple threads to search a red-black tree concurrently.
.PP
Red-black trees have a notion of `height',
which is the tree's black height.
.PP
The additional control data associated with a red-black tree node
consists of a flags word
.IR f .
The least significant bit is used by the implementation;
the remaining bits are available for use by the application:
Xyla will not alter these bits.
The meaning of the low bit is subject to change in future versions,
but, for diagnostic purposes, it might be useful to know that
bit 0 is clear if the node is black,
or set if the node is red.
.
.SS "Splay trees"
A
.I "splay tree"
is an adaptively restructured binary tree,
designed by Daniel Sleator and Robert Tarjan.
.PP
Splay trees are generally very fast.
However, splay trees are modified
in response to search operations
as well as insertions and removals,
which make them less unsuitable when trees are shared between threads,
or where their highly dynamic nature is otherwise undesirable.
.PP
Splay trees are restructured in response to search operations
as well as insertions or removals.
Iterators and paths
.I "remain valid"
when the tree is restructured.
Of course, if a node is removed,
then an iterator or path referring to that node becomes invalid.
(An empty path may refer unpredictably to the node
immediately to the left or right of the targetted gap.)
As a result of the restructuring,
it is
.I not
safe to access a splay tree from multiple threads concurrently,
even if they are only searching the tree.
.PP
The additional control data associated with a splay tree node
consists of a
.I parent
pointer:
in the root node, this pointer is null.
.PP
The
.B xyla_splay_rebalance
function will forcibly rebalance an entire tree.
The tree's root pointer is at
.BI * root \fR;
this pointer is updated to point to the new tree root.
The resulting structure is not guaranteed,
but the resulting tree's height will be minimal for the number of nodes.
Iterators and paths remain valid.
.PP
The
.B xyla_splay_splay
function `splays' a tree at the place indicated by a
.IR path .
The result is that the target node (full path)
or the node holding the null link to the target gap (empty path)
is promoted to the tree root.
Iterators and paths remain valid.
.
.SS "Treaps"
A
.I treap
is a
.I randomized
binary tree,
published by Seidel and Aragon.
Treaps are balanced with high probability.
.PP
Every node is assigned a
.IR weight ;
a child node is not allowed to be lighter than its parent.
If each node has a distinct weight,
then the lightest node is the tree root, and
the remaining nodes are divided into the root's left and right subtrees
according to the ordering of the nodes;
therefore the tree structure is uniquely determined
by the node ordering and their weights.
.PP
Treaps perform very well on average.
The maintenance algorithms are particularly simple and fast.
However,
the requirement for random weights can make them inconvenient
for some applications.
.PP
Treaps do not have a notion of `height'.
.PP
The structure of a treap is adjusted
when nodes are inserted or removed.
This invalidates all iterators and paths referring to the treap.
It is safe for multiple threads to search a treap concurrently.
.PP
The additional control data associated with a treap node is the weight
.IR wt .
This
.I must
be set by the caller before a node is inserted into the tree.
A node's weight must not change while the node is linked into a treap.
The replacement node passed to
.B xyla_treap_replace
must have the same weight as the node that it replacing.
If the tree structure is to be well balanced,
the weights must be
.I independent
of the node ordering.
This is the caller's responsibility.
For simple applications,
where the node ordering comes from a trusted source,
weights can be chosen without much ceremony,
e.g., even
.BR rand (3)
is likely to be sufficient.
If the node ordering is determined by an adversary,
then weights should be determined using
a cryptographically strong generator.
.PP
Because the tree structure is (nearly) uniquely determined
by the node ordering and weights,
rebalancing the tree is not possible.
The treap iterator and path structures are allocated such that
the probability that
a treap of maximum size will be too tall
with probability at most 2\*(ss128\*(se,
assuming that weights are properly chosen.
If the tree height is too great,
Xyla will call the function
.BR xyla__treap_boundfail .
On hosted systems, this function is defined by the Xyla library:
it simply prints a message on standard error and calls
.BR abort (3).
On freestanding systems,
a suitable implementation of
.B xyla__treap_boundfail
must be provided by the application:
it must not return to its caller.
.
.\"--------------------------------------------------------------------------
.SH "PRIMITIVES"
.
This section describes a number of utility functions and macros
used to implement the trees described above.
They are not intended for direct use by applications.
.
.SS "Iteration"
The iteration machinery provided here
can perform nonrecursive forward and reverse iteration
over an arbitrary binary tree,
i.e., using only the
.B left
and
.B right
links of
.BR "struct xyla_bt_node" .
.PP
Iteration state is maintained in a stack,
consisting of a vector
.I stack
of pointers to nodes,
and a stack pointer
.IR sp .
The stack is initially empty,
with
.IR sp "\ =\ 0."
The stack vector itself need not be initialized.
During iteration,
the topmost item on the stack,
.IB stack [ sp "\ \-\ 1]" \fR,
points to the next node,
.IR n ,
to return;
the lower entries contain
those ancestors of
.I n
which have not already been returned by the iteration,
from bottom (i.e., near the leaves of the tree)
to top (i.e., near the root).
.PP
The macro
.B XYLA_BT_STEPITER
advances the state of an iteration.
The
.I rc
argument is an lvalue to receive a return code;
.I node
is a pointer to an initial node;
.I stack
points to the stack vector;
.I sp
is an lvalue designating the stack pointer;
.I max
is the number of entries available in the stack vector; and
.I back
is either
.B left
for forward iteration, or
.B right
for reverse iteration.
The behaviour is to push nodes onto the stack and follow
.I back
links starting from the provided
.I node
until a null link is encountered.
If
.I node
is null, then nothing is done.
On success, the macro sets
.IB rc "\ =\ XYLA_RC_OK" \fR;
if the stack vector is too small, then it sets
.IB rc "\ =\ XYLA_RC_TALL" \fR,
and the stack is left unchanged;
no other return codes are possible.
.PP
An iteration state is established by calling
.B XYLA_BT_STEPITER
with
.I node
pointing to the root node of the tree.
Once this is done, if
.I sp
is zero, then all nodes have been processed;
otherwise, decrement
.IR sp ,
process the
.I node
at
.IB stack [ sp ] \fR,
and call again with
.IB node ->right
(for forward iteration) or
.IB node ->left
(for reverse iteration).
.
.SS "Path operations"
The path-management machinery can keep track of,
and adjust the position of,
a node or gap between nodes
within an arbitrary binary tree,
i.e., using only the
.B left
and
.B right
links of
.BR "struct xyla_bt_node" .
.PP
The path state is maintained in a vector
.I path
of
.IR "link addresses" ,
and an index
.IR k .
The first entry,
.IB path [0] \fR,
is always the address of the root link;
each subsequent entry holds the address of the
.B left
or
.B right
link of the node addressed by the previous link.
The topmost entry,
.IB path [ k ] \fR,
.RI ( not
.IR k "\ \-\ 1)"
is the link to the target node (a non-null link, indicating a full path)
or gap (a null link, indicating an empty path).
.PP
Many of the macros below additionally take a
.I htmax
argument giving the maximum tree height
which can be accommodated by the supplied
.I path
vector.
A vector must therefore have space for
.IR htmax "\ +\ 1"
entries.
.PP
The macro
.B XYLA_BT_CURRENT
returns the node targetted by a full
.IR path ,
or null if the
.I path
is empty,
similar to the
.BI xyla_ tree _current
function.
As mentioned above, this simply returns
.BI * path [ k ] \fR.
.PP
The macro
.B XYLA_BT_COPYPATH
copies a
.I path
to a new vector
.IR newpath ,
similar to the
.BI xyla_ tree _copypath
function.
.PP
The macro
.B XYLA_BT_EXTRMPATH
establishes a full path which targets the first or last node in a tree,
similar to the
.BI xyla_ tree _firstpath
or
.BI xyla_ tree _lastpath
functions;
if the tree is nil, then the path is empty.
The
.I rc
argument is an lvalue to receive a return code;
.I node
is an lvalue to receive a pointer to the first or last node
(or a null pointer, if the tree is nil);
.I path
is the path vector to fill in;
.I k
is an lvalue to receive the path length;
.I root
is the address of the tree root link;
.I dir
is
.B left
or
.B right
according to whether the first or last node, respectively, is sought; and
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
On exit,
.I rc
is
.B XYLA_RC_OK
on success, or
.B XYLA_RC_TALL
if the tree's height exceeded the caller's limit;
no other return codes are possible.
.PP
The macro
.B XYLA_BT_STEPPATH
advances a path one node left or right in the node ordering,
similar to the
.BI xyla_ tree _nextpath
or
.BI xyla_ tree _prevpath
functions.
If the path targets the gap at the extreme end,
or the tree is simply nil,
then it is left unchanged;
if the path targets the extreme node,
then it is updated to refer to the gap beyond;
otherwise, the path is updated to refer to
the next node in the appropriate direction.
The
.I rc
argument is an lvalue to receive a return code;
.I node
is an lvalue to receive a pointer to the previous or next node
(or a null pointer,
if the path is already at or beyond the extreme node);
.I path
is the path vector to update;
.I k
is an lvalue holding the current path length, to be updated;
.I dir
and
.I opp
are
.B left
and
.B right
to advance the path towards the start of the sequence, or
.I vice-versa
to advance the path towards the end; and
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
On exit,
.I rc
is
.B XYLA_RC_OK
on success, or
.B XYLA_RC_TALL
if the tree's height exceeded the caller's limit;
no other return codes are possible.
.PP
The macro
.B XYLA_BT_NUDGEPATH
advances a
.I full
path to refer instead to the gap immediately before or after it,
similar to the
.BI xyla_ tree _beforepath
or
.BI xyla_ tree _afterpath
functions.
It is an error if the path is empty.
The
.I rc
argument is an lvalue to receive a return code;
.I path
is the path vector to update;
.I k
is an lvalue holding current path length, to be updated;
.I dir
and
.I opp
are
.B left
and
.B right
to advance the path to the gap before the current target node, or
.I vice-versa
to advance the path to the gap after it; and
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
On exit,
.I rc
is
.B XYLA_RC_OK
on success, or
.B XYLA_RC_TALL
if the tree's height exceeded the caller's limit;
no other return codes are possible.
.PP
The macro
.B XYLA_BT_ROOTPATH
establishes a path targetting the root of a tree,
similar to the
.BI xyla_ tree _rootpath
function.
If the tree is nil, then the path will be empty;
otherwise it will be full.
The
.I node
argument is an lvalue to receive a pointer to the root node
(or a null pointer, if the tree is nil);
.I path
is the path vector to fill in;
.I k
is an lvalue to receive the path length (which will be exactly 0); and
.I root
is the address of the tree root link.
.PP
The macro
.B XYLA_BT_PARENTPATH
updates a
.I full
path to target the current target node's parent,
and reports the previous target node's position relative to its parent,
similar to the
.BI xyla_ tree _uppath
function.
If the path initially targets the tree root,
then this is reported, but the path is not changed.
The
.I node
argument is an lvalue to receive a pointer to the parent node,
or null if the path already targetted the tree root;
.I pos
is an lvalue to receive the position indicator;
.I path
is the path vector to update; and
.I k
is an lvalue holding the current path length, to be updated.
On exit,
.I pos
is set to
.B XYLA_BTPOS_LEFT
or
.B XYLA_BTPOS_RIGHT
if the original path target is
the left or right child of its parent, respectively; or
.B XYLA_BTPOS_ROOT
if the original path target is the tree root.
(Since this macro can only decrease the path length,
it will always succeed and there is no need for a return code.)
.PP
The macro
.B XYLA_BT_CHILDPATH
updates a
.I full
path to target the current target node's left or right child,
similar to the
.BI xyla_ tree _leftpath
or
.BI xyla_ tree _rightpath
functions.
The path will thereby become full or empty
according to whether the node has the relevant child.
The
.I rc
argument is an lvalue to receive a return code;
.I node
is an lvalue to receive a pointer to the child node,
or a null pointer if there is no such child;
.I path
is the path vector to update;
.I k
is an lvalue holding the current path length, to be updated;
.I dir
is
.B left
or
.B right
to select the left or right child, respectively;; and
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
On exit,
.I rc
is
.B XYLA_RC_OK
on success, or
.B XYLA_RC_TALL
if the tree's height exceeded the caller's limit;
no other return codes are possible.
.PP
The macro
.B XYLA_BT_RIPPLE
updates the ancestors of
the target node of a
.I full
path,
applying the update function of a node class,
similar to the
.BI xyla_ tree _ripple
function.
The
.I cls
argument is a pointer to the node class,
which must not be null,
and must have a non-null
.B upd
function pointer;
.I path
is the path vector; and
.I k
is the length of the path.
.PP
The macro
.B XYLA_BT_ASCEND
ascends the tree,
allowing a caller-provided function to accumulate summary data
from the ancestors of a node or gap targetted by a path,
similar to the
.BI xyla_ tree _ascend
function.
The
.I rc
argument is an lvalue to receive a return code;
.I fn
is a pointer to a
.B xyla_bt_ascendfn
to call on each ancestor node;
.I path
is the path vector;
.I k
is the path length; and
.I arg
is a pointer to be passed to
.IR fn .
See the description of
.BI xyla_ tree _ascend
for the full details.
On exit,
if
.I fn
returned a nonzero value,
then
.I rc
is that nonzero value;
otherwise
.I rc
is set to zero.
.
.SS "Search"
The following macros assist with searching binary trees.
.PP
The macro
.B XYLA_BT_LOOKUP
searches a tree,
guided by a navigation function,
returning the node found, or null,
similar to the
.BI xyla_ tree _lookup
function.
The
.I cls
argument is a pointer to a node class, or null;
.I node
is an lvalue to receive a pointer to the found node, or null;
.I root
is a pointer to the root node (not the address of the root link);
.I navfn
is a caller-supplied navigation function, or null; and
.I arg
is a pointer to be passed to the navigation function.
If
.I navfn
is not null, then it is used as the navigation function;
otherwise
.I cls
must not be null, and must have a non-null
.I nav
function,
which is used instead.
.PP
The macro
.B XYLA_BT_PROBE
searches a tree,
as for
.BR XYLA_BT_LOOKUP ,
and establishes a path to the node or gap found,
similar to the
.BI xyla_ tree _probe
function.
The
.I cls
argument is a pointer to a node class, or null;
.I rc
is an lvalue to receive a return code;
.I node
is an lvalue to receive a pointer to the found node, or null;
.I root
is a pointer to the root node (not the address of the root link);
.I navfn
is a caller-supplied navigation function, or null;
.I arg
is a pointer to be passed to the navigation function;
.I path
is the path vector to fill in;
.I k
is an lvalue to receive the path length; and
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
The navigation function is determined in the same way as by
.B XYLA_BT_LOOKUP
above.
On exit,
.I rc
is set to
.B XYLA_RC_OK
on success, or
.B XYLA_RC_TALL
.B XYLA_RC_TALL
if the tree's height exceeded the caller's limit;
no other return codes are possible.
.
.SS "Removal"
The function
.B xyla_bt_remove
assists with removal of nodes from binary trees.
It's easy to remove a leaf node,
or a node with only one child;
the usual approach for removing a node with both children
is to locate the node's immediate successor \(en
which is the leftmost node in the right subtree
and therefore cannot itself have a left child \(en
and exchange them.
There is a
.IR "deleted node" ,
which is the target node of the input path;
there is a
.IR "substitute node" ;
and there is a
.IR "replacement node" ,
with which the deleted node is replaced.
In the case where the deleted node has zero or one child,
the substitute node is null,
and the replacement node is the deleted node's child, if any, or null;
the link to the deleted node is replaced by a link to the replacement node.
In the case where the deleted node has two children,
then the substitute node is the deleted node's successor,
and the replacement node is the right child of the substitute node;
the link to the deleted node is replaced by a link to the substitute node,
and the link to the substitute node is replaced by the replacement node.
The arguments
.IR del_out ,
.IR subst_out ,
and
.IR replace_out
give locations where the deleted node,
substitute node, and
replacement node
are stored;
.I path
is a path vector initially holding a
.I full
path to the node to be deleted;
.I k_inout
is the address of the path length, which will be updated;
.I htmax
is the maximum tree height which can be accommodated by the
.I path
vector.
On success, the function restructures the tree as described,
updates the path to target to the replacement node,
(i.e., where we pretend that the target node was all along)
and returns
.BR XYLA_RC_OK ;
if the tree height is too great,
the function returns
.B XYLA_RC_TALL
and the path and tree are not modified;
no other return codes are possible.
.
.SS "Set operations"
This section describes functions which implement set operations
using `split' and `join' operations.
.PP
The necessary tree operations are supplied to the set functions
as a
.B "struct xyla_bt_treeops"
structure.
This contains three function pointers,
.BR join ,
.BR splitat ,
and
.BR splitroot ,
which behave as the functions
.BI xyla_ tree _join \fR,
.BI xyla_ tree _splitat \fR,
and
.BI xyla_ tree _splitroot \fR,
described above.
Note that the functions here include arguments for tracking tree heights,
whether or not the tree structure has a notion of height:
if the tree has no notion of height,
then the heights passed in and returned from the functions
are unimportant;
it's conventional, but not necessary, to pass zero.
.PP
The algorithms used by the set functions are nonrecursive,
but require additional storage
in the form of a `stack' vector of type
.B "struct xyla_bt_setopstack"
passed in by the caller.
This vector need not be initialized in any particular way,
and does not require any kind of special disposal.
.PP
The function
.B xyla_bt_unisect
computes the union and intersection of two trees,
similar to the
.BI xyla_ tree _unisect
function.
The argument
.I cls
is a pointer to the node class;
.I uni_out
and
.I isect_out
are pointers to the output root links for the union and intersection, and
.I uniht_out
and
.I isect_out
are pointers to the output heights for the trees, or null to discard them;
.I aroot
and
.I broot
are the addresses of the root links for the input trees, and
.I aht_inout
and
.I bht_inout
are the addresses of their heights, or null;
.I ops
is a pointer to the
.B "struct xyla_bt_treeops"
structure;
.I stack
is a pointer to the stack vector; and
.I htmax
is the number of elements in the stack vector.
The function returns
.B XYLA_RC_OK
on success:
in this case,
.BI * aroot
and
.BI * broot
are cleared (unless they coincide with other outputs);
.BI * aht_inout
and
.BI * bht_inout
are also zeroed, unless the respective pointers are null,
or they overlap with other outputs;
.BI * uni_out
and
.BI * isect_out
point to the root nodes of the union and intersection trees; and
.BI * uniht_out
and
.BI * isectht_out
hold the heights of the output trees.
If the provided stack vector was insufficient, then
.B XYLA_RC_TALL
is returned;
.BI * uni_out \fR,
.BI * isect_out \fR,
.BI * uniht_inout \fR,
and
.BI * isectht_inout \fR
are unchanged;
and the input trees at
.BI * aroot
and
.BI * broot \fR,
and their heights in
.BI * aht_inout
and
.BI * bht_inout \fR,
are
.I changed
in such a way that,
if the call is retried with a larger stack vector,
or the trees are rebalanced so as to reduce their maximum height,
then the correct answer will be obtained.
.PP
The function
.B xyla_bt_diffsect
computes the difference and intersection of two trees,
similar to the
.BI xyla_ tree _diffsect
function.
The argument
.I cls
is a pointer to the node class;
.I diff_out
and
.I isect_out
are pointers to the output root links for the union and intersection, and
.I diffht_out
and
.I isect_out
are pointers to the output heights for the trees, or null to discard them;
.I aroot
and
.I broot
are the addresses of the root links for the input trees, and
.I aht_inout
is the addresses of the
.I a
tree's height, or null;
.I ops
is a pointer to the
.B "struct xyla_bt_treeops"
structure;
.I stack
is a pointer to the stack vector; and
.I htmax
is the number of elements in the stack vector.
The function returns
.B XYLA_RC_OK
on success:
in this case,
.BI * aroot
is cleared (unless it coincide with another output);
.BI * aht_inout
is also zeroed, unless the respective pointers are null,
or it overlaps with another output;
.BI * diff_out
and
.BI * isect_out
point to the root nodes of the difference and intersection trees; and
.BI * uniht_out
and
.BI * isectht_out
hold the heights of the output trees.
If the provided stack vector was insufficient, then
.B XYLA_RC_TALL
is returned;
.BI * diff_out \fR
and
.BI * diffht_inout \fR
are unchanged; and
the nodes of the
.I a
tree are scattered in an arbitrary manner
between
.BI * aroot
and
.BI * isect_out \fR,
with their heights in
.BI * aht_inout
and
.BI * isectht_out \fR,
so that,
if these two trees are remerged
(e.g., by
.B xyla_bt_unisect
\(en note that their intersection will be nil)
then the original
.I a
tree will be recovered,
up to differences in internal structure.
.
\"--------------------------------------------------------------------------
.SH "CHECKING FRAMEWORK"
.
The Xyla library includes a comprehensive and robust framework
for checking tree structures.
.
.SS "Check process"
Checking a tree involves a walk over the tree nodes.
Checking is
.IR destructive :
links are modified in order to keep track of
which nodes have been visited so far,
in order to detect `tangled pointers'.
.PP
The walk will call
.I checking functions
on each node.
There are potentially two such checking functions: the
.I tree checker
is provided by the tree implementation,
to verify properties of the tree structure,
e.g., the AVL balance property, or
the red-black properties; and the
.I user checker
to verify properties determined by the application,
e.g., node ordering properties, or
summary data maintained by
.B upd
functions.
Associated with each checking function are a single
.I "state pointer" ,
and a
.I "node information block"
for each node in the tree.
.PP
A checking function is passed an
.I op
code, and
a pointer to a
.B "struct xyla_bt_check"
structure.
Checking functions are called multiple times for each node: the
.I op
argument explains how the current call fits into the overall sequence.
The
.B "struct xyla_bt_check"
structure collects together the known facts about a node \(en
its parent,
its children,
and the node information blocks for all of these \(en
and global state for the tree walk.
A checking function returns an integer code, which should be
.B XYLA_RC_OK
(zero) if the call was successful;
.B XYLA_RC_BAD
if the function detected a problem,
but checking should continue,
e.g., to find other problems; or
some other code to cause a
.I panic
and end the checking procedure as soon as possible.
.PP
The checking procedure is as follows.
The walk begins at the root and proceeds mostly recursively.
(The actual implementation is nonrecursive,
in the sense that it uses constant stack space.)
.PP
If the node is null, then the checking functions are called with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_NIL .
Otherwise, the following steps are taken in order.
.hP 1.
An information block is allocated for each active checking function.
.hP 2.
The checking functions are called with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_SETUP
to initialize their information blocks.
.hP 3.
The checking functions are called with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_BEFORE .
.hP 4.
The node's left link is visited recursively.
.hP 5.
The checking functions are called with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_MID .
.hP 6.
The node's right link is visited recursively.
.hP 7.
The checking functions are called with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_AFTER .
.hP 8.
Some time later,
after the checking functions have been called on the
.I parent
node with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_AFTER ,
they are called on
.I this
node with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_TEARDOWN .
.PP
There's no clear technical distinction between
.B XYLA_CHKOP_SETUP
and
.BR XYLA_CHKOP_BEFORE .
The idea is that
.B XYLA_CHKOP_SETUP
should ensure that a subsequent
.B XYLA_CHKOP_TEARDOWN
call can run successfully, while
.B XYLA_CHKOP_BEFORE
should ensure that any information
needed by the node's children
is available prior to visiting them.
.PP
In the event of a panic,
not all of the above calls will be issued.
However, a call with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_TEARDOWN
will be made for a node if and only if a call with
.IR op "\ \=\ " \fB\&XYLA_CHKOP_SETUP
was previously made for the same node,
regardless of whether that call succeeded.
.PP
The
.B "struct xyla_bt_check"
structure contains the following members.
.TP
.B cls
The node class for the tree,
as passed to the
.B xyla_bt_check
function.
.TP
.B root
The address of the tree's root link.
This is mostly used for reporting the tree identity in diagnostics.
.TP
.B fp
The output stream for diagnostic messages,
as passed to the
.B xyla_bt_check
function.
This may be null to suppress diagnostics.
.TP
.B f
The flags word,
as passed to the
.B xyla_bt_check
function.
.BR parent " and " parent_info
The parent node,
and the checker's node information block for the parent.
If the current node is the tree root,
then these are both null pointers.
.BR node ", " pos ", and " node_info
The current node,
its position relative to its parent,
and the checker's node information block for the node.
The
.B pos
member is
.B XYLA_BTPOS_ROOT
if the node is the tree root, or
.B XYLA_BTPOS_LEFT
or
.B XYLA_BTPOS_RIGHT
if it is the left or right child of its parent, respectively.
When a null link is encountered,
.IR op "\ \=\ " \fB\&XYLA_CHKOP_NIL ,
and both
.B node
and
.B node_info
are null;
.B pos
is still set indicating the position of the null link.
.TP
.BR left " and " left_info
The left child node,
and the checker's node information block for the child.
Both of these will be null if the current node has no left child.
The
.B left_info
pointer will be null if
.I op
is either
.B XYLA_CHKOP_SETUP
or
.BR XYLA_CHKOP_BEFORE .
.TP
.BR right " and " right_info
The right child node,
and the checker's node information block for the child.
Both of these will be null if the current node has no right child.
The
.B right_info
pointer will be null if
.I op
is either
.B XYLA_CHKOP_SETUP
or
.BR XYLA_CHKOP_BEFORE .
.TP
.B state
The state pointer associated with the checking function.
.PP
A checking function is allowed to modify the
.BR parent_info ,
.BR node_info ,
.BR left_info ,
.BR right_info ,
and
.B state
members of the structure,
which may be useful when chaining to other checking functions.
Other members must not be changed.
.
.SS "Node class checking operations"
A node class can include operations used during checking.
These are defined in
.BR "struct xyla_bt_chkops" .
A node class which includes checking operations
must also include ordering operations
(the
.B nav
and
.B key
functions)
though these can be null pointers if they are not otherwise required.
The checking operations are as follows.
.TP
.B chk
The user checking function, as described above.
.TP
.B infosz
The size of a node information block used by the user checking function.
.TP
.B id
A function to print a description of a node.
This is used by
.B xyla_bt_printnode
(described below);
it may be null, in which case a default generic format is used.
.
.SS "Functions"
The function
.B xyla_bt_check
is the main entry point into the checking machinery.
It performs the checking procedure described above.
The argument
.I cls
is a pointer to the node class,
which may be null if no user checking is required;
.I root
is the address of the tree's root link,
which is used in diagnostic messages;
.I fp
is the output string on which diagnostic messages should be printed,
which may be null to suppress diagnostic output;
.I tree_chkfn
is the tree checking function,
which may (unusually) be null
if the tree structure has no special invariants;
.I tree_infosz
is the size of a node information block used by the tree checking function;
.I tree_state
is the state pointer to be passed to the tree checking function; and
.I user_state
is the state pointer to be passed to the user checking function.
The function returns
.B XYLA_RC_OK
if the check succeeded with no problems;
.B XYLA_RC_BAD
if the check reported problems with the tree structure; or
some other nonzero return code if a checking function caused a panic \(en
the code returned is equal to that
returned by the panicking checking function.
.PP
The function
.B xyla_bt_bughdr
begins printing a diagnostic message reporting a problem with a tree.
The header printed has the form
.IP
.I token
.I root
.B BUG:
.PP
The argument
.I token
identifies the checking function which found the problem,
e.g.,
.B XYLA-AVL
for the AVL tree invariant checker;
.I root
is the root link address,
which is printed to identify the faulty tree; and
.I fp
is the output stream on which the message is to be printed.
The caller should continue to describe the problem
using
.BR fprintf (3)
and
.B xyla_bt_printnode
(below),
and finally end with a newline.
.PP
The function
.B xyla_bt_printnode
prints a description of a node.
The argument
.I cls
is the node class,
as provided to
.BR xyla_bt_check ;
.I node
is the node address (which may be null); and
.I fp
is the stream on which the description is to be printed.
If the node class includes an
.B id
function, then this is simply called,
with the same arguments,
to do the job.
Otherwise, a generic description is printed,
including the node address.
.PP
The function
.B xyla_bt_chkorder
is a checking function which verifies that
the order of nodes in a tree
matches that expected by the
.B nav
and
.B key
ordering operations in the node class.
The function uses
.B "struct xyla_bt_ordinfo"
as its node information block,
and requires no state.
It cannot panic.
.
.\"--------------------------------------------------------------------------
.SH "EXAMPLES"
.
.SS "A search tree with string-valued keys"
Our first example demonstrates a binary search tree,
whose search keys are null-terminated strings.
We shall use AVL trees in this example.
The node structure must therefore be something like this.
.VS
.ne 5
.ta 2n
struct node {
struct xyla_avl_node avl;
char *key;
/*\fR other data \fP*/
};
.VE
Next, we must establish the node class operations.
We don't need an update function.
Since this is a binary search tree,
we should provide
.IR "ordering operations" ,
as described above.
Our search keys will be simple C-style character pointers
to null-terminated strings.
The navigation function can use
.BR strcmp (3)
to compare keys.
.VS
.ne 8
.ta 2n \w'\fBstatic int str_nav('u
static int str_nav(const struct xyla_bt_nodecls *cls,
const struct xyla_bt_node *node, void *arg)
{
const struct node *n = (const struct node *)node;
const char *k = arg;
return (strcmp(k, n->key));
}
.VE
The key-projection function is nearly trivial.
.VS
.ne 7
.ta 2n \w'\fBstatic int str_key('u
static int str_key(const struct xyla_bt_nodecls *cls,
const struct xyla_bt_node *node)
{
const struct node *n = (const struct node *)node;
return (n->key);
}
.VE
Finally, we should add checking operations.
We can use
.B xyla_bt_chkorder
as the checking function.
To make diagnostic output more readable,
it will be best to provide our own node-identification function.
.VS
.ne 7
.ta 2n \w'\fBstatic void str_id('u
static void str_id(const struct xyla_bt_nodecls *cls,
const struct xyla_bt_node *node, FILE *fp)
{
const struct node *n = (const struct node *)node;
fprintf(fp, "#", n->key);
}
.VE
With all this done, we can define the node operations table.
.VS
.ne 4
.ta 2n
static const struct xyla_bt_chkops str_ops = {
{ sizeof(str_ops), 0 },
{ str_nav, str_key },
{ xyla_bt_chkorder, sizeof(struct xyla_bt_ordinfo), str_id }
};
.VE
Our node class requires no additional state,
so we can use
.B struct xyla_bt_nodecls
directly for our node class object.
This can be initialized as follows.
.VS
.ne 3
struct xyla_bt_nodecls str_cls;
str_cls.ops = &str_ops.node;
.VE
We can find the node in the tree with a particular key like this.
.VS
.ne 6
struct xyla_bt_node *root;
struct node *node;
const char *key;
...
node = xyla_avl_lookup(&str_cls, &root, 0, key);
.VE
Commonly, applications want to find a node with a key,
or insert a new node with that key.
This can be achieved as follows.
.VS
.ne 16
.ta 2n +2n
struct xyla_bt_node *root;
struct xyla_bt_path path;
struct node *node;
const char *key; size_t kn;
...
node = xyla_avl_probe(&str_cls, &root, 0, key, &path);
if (!node) {
node = malloc(sizeof(struct node)); if (!node) goto nomem;
kn = strlen(key);
node->key = malloc(kn + 1);
if (!node->key) { free(node); goto nomem; }
memcpy(node->key, key, kn + 1);
/*\fR initialize the rest of the node \fP*/
xyla_avl_insert(&str_cls, &path, node);
}
/*\fR update an existing node \fP*/
.VE
.SS "A tree where nodes are accessed by index"
The next example shows how a tree can be used to
maintain a sequence of items,
with efficient access to nodes by
.IR index .
The primary `trick' here is to maintain, for each node,
the size of that node's subtree.
The index of the root node is simply the size of its left subtree;
the index of an arbitrary node situated in the left subtree
is equal to its index within the left subtree; and
the index of a node situated in the right subtree
is equal to its index within the right subtree
.I plus
the size of the left subtree,
plus 1 for the root node.
.PP
We shall use a red-black tree for this example,
in the expectation that insertions and removals will be frequent.
(A relatively static sequence may well be better handled
as a simple vector.)
The node structure must be something like this.
.VS
.ne 5
.ta 2n
struct node {
struct xyla_rb_node rb;
size_t n;
/*\fR other data \fP*/
};
.VE
The size member
.B n
needs to be updated when the tree is restructured.
We therefore need to define an update function.
.VS
.ne 9
.ta 2n +2n \w'\fBstatic void idx_upd('u
static void idx_upd(struct xyla_bt_nodecls *cls,
struct xyla_bt_node *node)
{
struct node *n = (struct node *)node,
*l = (struct node *)n->rb.bt.left,
*r = (struct node *)n->rb.bt.right;
n->n = 1 + (l ? l->n : 0) + (r ? r->n : 0);
}
.VE
This isn't a search tree,
so a key-projection function doesn't make any sense.
But we shall still need a navigation function
to find a node given its index.
Unlike above,
this navigation function will need to
.I adjust
its argument as the search descends the tree,
maintaining the invariant that it holds the index of the node sought
within the subtree rooted at the current node.
.VS
.ne 14
.ta 2n +2n \w'fBstatic int idx_nav('
static int idx_nav(struct xyla_bt_nodecls *cls,
const struct xyla_bt_node *node, void *arg)
{
const struct node *n = (const struct node *)node,
*l = (const struct node *)n->rb.bt.left;
size_t *i_inout = arg;
if (l) {
if (i < l->n) return (\-1);
else i -= l->n;
}
if (!i) return (0);
else { *i_inout = i \- 1; return (+1); }
}
.VE
Finally, we can define checking operations.
We omit a node-identification function here,
since that will depend on the content of a node.
But the checking function is easy enough.
.VS
.ne 24
.ta \w'\fBstatic int idx_chk('u
static int idx_chk(unsigned op,
const struct xyla_bt_check *chk)
.ta 2n +2n +2n +2n +\w'\fBfprintf('u
{
const struct node *node, *left, *right;
size_t n;
int rc = XYLA_RC_OK;
if (op == XYLA_CHKOP_AFTER) {
node = (const struct node *)chk->node;
left = (const struct node *)chk->left;
right = (const struct node *)chk->right;
n = 1 + (left ? left->n : 0) + (right ? right->n : 0);
if (node->n != n) {
if (chk->fp) {
xyla_bt_bughdr("EXAMPLE", chk->root, chk->fp);
fputs("node ", chk->fp);
fprintf(chk->fp, " count %lu /= %lu expected\n",
(unsigned long)node->n, (unsigned long)n);
}
rc = XYLA_RC_BAD;
}
}
return (rc);
}
.VE
We can now define the node class operations table.
.VS
static const struct xyla_bt_chkops idx_ops = {
{ sizeof(idx_ops), idx_upd },
{ 0, 0 },
{ idx_chk, 0, 0 }
};
.VE
The node class can be established as in the previous example
.VS
.ne 3
struct xyla_bt_nodecls idx_cls;
idx_cls.ops = &idx_ops.node;
.VE
The count
.B n
in the root node will always be equal to
the total number of nodes in the tree.
.VS
.ne 4
struct xyla_bt_node *root;
size_t n;
...
n = root ? ((const struct node *)root)->n : 0;
.VE
To find the node at index
.BR i ,
use code like the following.
Note that
.B i
will be
.I clobbered
by the navigation function.
.VS
.ne 6
struct xyla_bt_node *root;
struct node *node;
size_t i;
...
node = xyla_rb_lookup(&idx_cls, &root, idx_nav, &i);
.VE
The
.B node
pointer will be null if the index was out of range.
.PP
To insert a new node at an index
.BR i ,
so that existing nodes with index
.B i
or greater are shifted along by one place,
one might write code as follows.
We assume that
.B n
holds the total number of nodes in the tree:
this can either be maintained,
or determined from the root node as shown above.
.VS
.ne 19
.ta 2n +2n
struct xyla_bt_node *root;
struct xyla_bt_path path;
struct node *node;
size_t i, n;
...
if (i > n)
goto out_of_range;
else if (i == n) {
xyla_rb_lastpath(&root, &path);
xyla_rb_afterpath(&path);
} else {
node = xyla_rb_probe(&idx_cls, &root, idx_nav, &i, &path);
assert(node);
xyla_rb_beforepath(&path);
}
node = malloc(sizeof(struct node)); if (!node) goto nomem;
/*\fR initialize the node \fP*/
xyla_rb_insert(&idx_cls, &path, node);
.VE
.
.\"--------------------------------------------------------------------------
.SH "BUGS"
.
It would be really nice if there were some way to implement
.IR "copy on write" .
Then a node could be attached to multiple trees,
with a reference count.
When the node needs to be changed,
either because the application changes the data held by the node,
or because the node's links need to be changed
to maintain the tree's balance properties,
and the node is part of multiple trees,
then, in principle, a fresh copy of the node can be allocated,
leaving the other trees unchanged.
I'd like to support this by adding a
.B touch
function to the node class:
then the library will
.B touch
each node that it wants to change
and allow the application to create a copy if necessary.
This is hard for two reasons.
.hP \*o
Firstly, and most obviously,
it introduces a failure point into middle of the tree balancing algorithms.
If the
.B touch
function fails,
most likely due to lack of memory,
then the operation needs to be `rewound' somehow and an error reported.
This adds significant complexity to the core tree maintenance algorithms.
It gets even worse in the set functions,
which may end up in a situation where a failure has occurred
while the algorithm has lots of small fragments of tree,
and it seems very likely that whatever caused the initial failure
will also prevent the fragments from being reassembled.
It's tempting to declare that
.B touch
must be infallible,
but I'm not yet ready to bite that bullet.
.hP \*o
Secondly, not all tree structures can support copy-on-write in a useful way.
Splay trees maintain parent pointers:
I initially tried without, and it led to frequent rebalancing,
which had terrible performance.
As a result, if any node is replaced,
the
.I entire
tree needs to be rewritten.
(Essentially, if we replace some node
.IR n ,
then every other node from which
.I n
can be reached also needs to replaced.
In a tree without parent pointers,
that's just the node's ancestors,
and there are only logarithmically many of them.
If we have parent pointers, though,
that's the whole tree.
.PP
So currently there's no copy-on-write support,
because it introduces failures which are really hard to deal with,
and can't be implemented on all kinds of trees anyway.
.PP
There are some kinds of node annotations which are hard to maintain.
For example, there's a trick for reversing the order of subranges:
essentially, you just set a bit on a node to say
`these nodes are in the opposite order from usual'.
With that, and swapping a few left and right child links, you're done.
Correctly maintaining the swap bits and state
seems hard in the current structure.
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
.
.BR hsearch (3),
.BR tsearch (3).
.PP
(Seriously,
.BR hsearch (3)
is comically bad:
don't use it.
The
.BR tsearch (3)
function is still quite bad, but at least usable.)
.
.\"--------------------------------------------------------------------------
.SH "AUTHOR"
.
Mark Wooding,
.
.
.\"----- That's all, folks --------------------------------------------------
.
.\" LocalWords: Adelson aht aq aroot ascendfn avl AVLBAL AVLF bht br broot
.\" LocalWords: bsearch bt BTPOS bughdr CBOR chk CHKF chkfn CHKOP CHKRC cls
.\" LocalWords: de del DER diffht ds dummyfn el enum ev expht EXTRMPATH fam
.\" LocalWords: fB fBfprintf fBstatic fi fm fn fP fprintf fputs fR fu goto
.\" LocalWords: Guibas hP hsearch HTCHG htmax IB idfn idx ie infosz inout
.\" LocalWords: invariants isect isectht iter joinfn keyfn Landis LBIAS lht
.\" LocalWords: lu lvalue malloc mem memcpy nav navfn ne nf nodecls NODEPFX
.\" LocalWords: nodeu NODEUSFX nomem offsetof ord ordops ordopslots pos rb
.\" LocalWords: RBF RBIAS rc rebalance rebalanced rebalancing remerged rht
.\" LocalWords: rit riter rootht rsvd se Seidel sizeof Sleator sp spl
.\" LocalWords: splitat splitatfn splitrootfn sqrt sr ss str strcmp
.\" LocalWords: strerror strlen sz th TP treaps trp tsearch ue uniht upd
.\" LocalWords: updfn uppath VE Velsky w'fBstatic xf xyla Xyla's