chiark - git - mdw - qpweb/blob - qp.w

   1 %%% -*- mode: latex; mmm-classes: cweb -*-
   2
   3 \documentclass[baseclass = strayman]{cweb}
   4
   5 \usepackage{enumerate}
   6 \usepackage[T1]{fontenc}
   7 \usepackage[utf8]{inputenc}
   8 \usepackage[palatino, helvetica, courier, maths = cmr]{mdwfonts}
   9 \let\saveCwebN=\N
  10 \usepackage{mdwmath}
  11 \usepackage{crypto}
  12 \usepackage{tikz}
  13   \usetikzlibrary{arrows.meta}
  14   \usetikzlibrary{shapes.geometric}
  15   \usetikzlibrary{shapes.misc}
  16 \let\N=\saveCwebN
  17
  18 \def\bitor#1!{\begingroup\let\MOD=\OR#1\endgroup}
  19 \def\email#1{\href{mailto:#1}{\nolinkurl{#1}}}
  20 \defop\hwt{hwt}
  21 \defbrk\parens{(}{)}
  22 \defbrk\index{[}{]}
  23
  24 \def\card{\#}
  25 \let\NULL=\Lambda
  26
  27 \def\fixme#1{\marginpar{\textbf{FIXME!}}\fbox{\textbf{FIXME:} #1}}
  28
  29 \makeatletter
  30 \def\CwebNumber#1{%             % octal, hex, or decimal constant
  31     \hbox{%
  32         $%
  33             \def\?{\kern.2em}%
  34             \def\$##1{\egroup\sb{\rm##1}\bgroup}% suffix to constant
  35             \def\_{\cdot 10^{\aftergroup}}% power of ten (via dirty trick)
  36             \let\~\cwbb@@oct \let\^\cwbb@@hex
  37             {#1}%
  38         $}%
  39     }
  40 \def\CwebSectLayout{\normalfont\sffamily\bfseries}
  41 \def\CwebPointer{\mathord{\rightarrow}}
  42
  43 \tikzset
  44   {> = {Computer Modern Rightarrow[length = 5pt, width = 7pt]},
  45    algstep/.style = {draw, align = center, inner sep = 6pt},
  46    algcond/.style = {algstep, shape = rounded rectangle, aspect = 2}}
  47
  48 \title{QP-tries}
  49 \author{Tony Finch \and Mark Wooding}
  50
  51 \begin{document}
  52
  53 \maketitle
  54 \tableofcontents
  55
  56 %% hack formatting for various identifiers
  57 @s uint64_t int
  58 @s uint32_t int
  59 @s xor anything
  60
  61 %%%--------------------------------------------------------------------------
  62 @* Introduction.
  63
  64 Some blurb here?
  65
  66 The code here is all Tony's.  I haven't even reformatted it, though
  67 \texttt{CWEAVE} applies its own formatting.
  68
  69 %%%--------------------------------------------------------------------------
  70 @* Preliminaries.
  71
  72 @*1 Copyright notice.
  73
  74 More strictly, this is a notice about a lack of copyright.  The qp-trie code
  75 is, to the maximum extent permitted, not protected by copyright.  Some
  76 jurisdictions allow authors to abandon their copyrights explicitly; others
  77 don't.
  78
  79 @<Copyright notice@>=
  80 /* Written by Tony Finch \email{dot@@dotat.at} \\
  81    You may do anything with this. It has no warranty. \\
  82    \url{http://creativecommons.org/publicdomain/zero/1.0/}
  83  */
  84
  85
  86 @*1 Tables.
  87
  88 A \emph{table} is an abstract data type which maintains a dynamic mapping
  89 from null-terminated strings to non-null |void *| pointers.
  90
  91 The caller is responsible for managing the the key and value storage.  Value
  92 pointers must be word-aligned: our tables will make use of low-order pointer
  93 bits to store tags.  General-purpose allocators like |malloc| will return
  94 suitably aligned memory on conventional target platforms; modifications will
  95 need to be made to support word-addressed machines which lack space for tag
  96 bits.
  97
  98 The interface is defined in a header file \texttt{Tbl.h}.
  99
 100 @(Tbl.h@>=
 101 // Tbl.h: an abstract API for tables with string keys.
 102 @<Copyright ...@>
 103 @#
 104 #ifndef Tbl_h
 105 #define Tbl_h
 106 @#
 107 @<\texttt{Tbl.h} declarations@>
 108 @#
 109 #endif
 110
 111 @ The interface includes a number of convenience functions which can be
 112 implemented in terms of other functions, rather than by each individual table
 113 data structure.  These common implementations are given in \texttt{Tbl.c}.
 114
 115 @(Tbl.c@>=
 116 // Tbl.c: simpler wrappers for core table functions
 117 @<Copyright ...@>
 118 @#
 119 #include <stdbool.h>
 120 #include <stdlib.h>
 121 #include <string.h>
 122 @#
 123 #include "Tbl.h"
 124 @#
 125 @<Common table utilities@>
 126
 127 @ A table is represented by a pointer to the incomplete |struct| type |Tbl|.
 128 There's no function to initialize a table: an empty table is simply
 129 represented as a null pointer.  Table pointers are passed by value; functions
 130 such as |Tset| which update the table return the new pointer.
 131
 132 @<\texttt{Tbl.h} declarations@>=
 133 typedef struct Tbl Tbl;
 134
 135 @ The |Tget|\ldots\ functions retrieve the information assoicated with a key.
 136 The simplest function is |Tget|, which takes pointers to a table and a
 137 null-terminated key string, and returns the associated value pointer; if
 138 there is no entry for the given key, then |Tget| returns null.  It's not
 139 possible to associate a key with a null pointer, so this is not ambiguous.
 140
 141 If you already have the string's length lying around for some reason, you can
 142 call |Tgetl| instead, passing the length along.  The length does not include
 143 the null terminator, which must still be present.
 144
 145 @<\texttt{Tbl.h} declarations@>=
 146 void *Tgetl(Tbl *tbl, const char *key, size_t klen);
 147 void *Tget(Tbl *tbl, const char *key);
 148
 149 @ These functions are both implemented in terms of the primitive |Tgetkv|.
 150 If an entry for the key string is found in the table, then |Tgetkv| stores
 151 pointers to the original key string and the associated value in |*rkey| and
 152 |*rval|, respectively, and returns |true|; otherwise, it returns |false|
 153 leaving |*rkey| and |*rval| unchanged.
 154
 155 @<\texttt{Tbl.h} declarations@>=
 156 bool Tgetkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval);
 157
 158 @ The implementations of |Tget| and |Tgetl| are trivial.
 159 @<Common table utilities@>=
 160 void *
 161 Tgetl(Tbl *tbl, const char *key, size_t len) {
 162         const char *rkey = NULL;
 163         void *rval = NULL;
 164         if(Tgetkv(tbl, key, len, &rkey, &rval))
 165                 return(rval);
 166         else
 167                 return(NULL);
 168 }
 169
 170 void *
 171 Tget(Tbl *tbl, const char *key) {
 172         return(Tgetl(tbl, key, strlen(key)));
 173 }
 174
 175 @ The |Tset|\ldots\ functions add or modify entries in the table.  They
 176 return the (possibly modified) table pointer.  The |Tset| function takes a
 177 pointer to a null-terminated key string and the |void *| value pointer to
 178 associate with it.  If the table already contains an entry for the key, then
 179 the entry is updated to associate the new value with the key; the old value
 180 pointer is lost.  Otherwise, a new entry is added to the table, associating
 181 the key with the value.  If the value pointer is null, then any existing
 182 entry for the key is removed, as if |Tdel| had been called.
 183
 184 The |Tsetl| function is the same, except that it also expects to be given the
 185 length of the key string.  As for |Tgetl| above, the length doesn't include
 186 the null terminator, which must still be present.
 187
 188 Updating the table can fail.  Failures are reported by setting the global
 189 variable |errno| to a nonzero value and returning a null pointer.  The
 190 possible |errno| values are as follows.
 191 \begin{description}
 192 \item[|EINVAL|] The value pointer is not word-aligned.
 193 \item[|ENOMEM|] There was insufficient memory to extend the table.
 194 \end{description}
 195
 196 This means that the |Tset|\ldots\ functions can return null in two different
 197 cases.
 198 \begin{itemize}
 199 \item If the value pointer is not null, then they return null if updating the
 200   table fails.  In this case, they set |errno| to a nonzero value to indicate
 201   the problem.
 202 \item If the value pointer is null, then they return null if the table is now
 203   empty.  This is a successful outcome; |errno| is not changed.
 204 \end{itemize}
 205
 206 @<\texttt{Tbl.h} declarations@>=
 207 Tbl *Tsetl(Tbl *tbl, const char *key, size_t klen, void *value);
 208 Tbl *Tset(Tbl *tbl, const char *key, void *value);
 209
 210 @ The |Tset| function is implemented in terms of |Tsetl| in the obvious way.
 211 The |Tsetl| function is primitive, implemented by the data structure.
 212
 213 @<Common table utilities@>=
 214 Tbl *
 215 Tset(Tbl *tbl, const char *key, void *value) {
 216         return(Tsetl(tbl, key, strlen(key), value));
 217 }
 218
 219 @ The |Tdel|\ldots\ functions remove entries from the table.  They return the
 220 new table pointer, which will be null if the table is now empty.  Following
 221 the now-familiar pattern, the |Tdel| function takes pointers to a table and a
 222 null-terminated key string; the |Tdell| function is additionally passed the
 223 length of the string.
 224
 225 @<\texttt{Tbl.h} declarations@>=
 226 Tbl *Tdell(Tbl *tbl, const char *key, size_t klen);
 227 Tbl *Tdel(Tbl *tbl, const char *key);
 228
 229 @ The |Tdel| and |Tdell| functions are implemented in terms of the primitive
 230 |Tdelkv|, which additionally saves pointers to the entry's original key and
 231 value in |*rkey| and |*rvalue| so that their storage can be reclaimed.
 232
 233 @<\texttt{Tbl.h} declarations@>=
 234 Tbl *Tdelkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval);
 235
 236 @ The implementations of |Tdel| and |Tdell| are straightforward.
 237
 238 @<Common table utilities@>=
 239 Tbl *
 240 Tdell(Tbl *tbl, const char *key, size_t len) {
 241         const char *rkey = NULL;
 242         void *rval = NULL;
 243         return(Tdelkv(tbl, key, len, &rkey, &rval));
 244 }
 245
 246 Tbl *
 247 Tdel(Tbl *tbl, const char *key) {
 248         return(Tdell(tbl, key, strlen(key)));
 249 }
 250
 251 @ We next turn to enumerating the entries in a table.  The |Tnext| function
 252 is given a pointer to a table, and the addresses |pkey| and |pvalue| of
 253 key-string and value pointers.  On entry, |*pkey| must point to a key string
 254 for which an entry is present in the table; on exit, |*pkey| and |*pvalue|
 255 are updated to point to the next key, in lexicographic order, for which there
 256 is an entry in the table, |*pval| is set to the associated value pointer, and
 257 |true| is returned.  If no such entry exists, then |*pkey| is set to null and
 258 |false| is returned.  As a special case, if |*pkey| is null on entry, then
 259 the key and value for the table entry whose key is lexicographically first is
 260 found; if the table is entry, then, again, |false| is returned.  The initial
 261 value of |*pvalue| is ignored.
 262
 263 The |Tnextl| function is similar, except that it also stores the length of
 264 the key string in |*pklen|.  The |Tnxt| function has a simpler interface,
 265 intended for debugging: it receives a key string pointer, rather than its
 266 address, as an argument, and returns a pointer to the lexicographically next
 267 key string present in the table, or null if no such string exists; it doesn't
 268 report the associated value pointer at all.
 269
 270 @<\texttt{Tbl.h} declarations@>=
 271 bool Tnextl(Tbl *tbl, const char **pkey, size_t *pklen, void **pvalue);
 272 bool Tnext(Tbl *tbl, const char **pkey, void **pvalue);
 273 const char *Tnxt(Tbl *tbl, const char *key);
 274
 275 @ The |Tnext| and |Tnxt| functions are implemented in terms of |Tnextl| in
 276 the obvious way.
 277
 278 @<Common table utilities@>=
 279 bool
 280 Tnext(Tbl *tbl, const char **pkey, void **pvalue) {
 281         size_t len = *pkey == NULL ? 0 : strlen(*pkey);
 282         return(Tnextl(tbl, pkey, &len, pvalue));
 283 }
 284
 285 const char *
 286 Tnxt(Tbl *tbl, const char *key) {
 287         void *value = NULL;
 288         Tnext(tbl, &key, &value);
 289         return(key);
 290 }
 291
 292 @ Finally, we define some diagnostic tools.
 293
 294 The |Tsize| function reports statistics about a table.  Specifically, it
 295 stores information in the addresses given as its arguments:
 296 \begin{description}
 297 \item[|*rtype|] A statically allocated string describing the table
 298   implementation.
 299 \item[|*size|] The total memory allocated by the table.  This doesn't include
 300   keys or values, because they're allocated by the caller.
 301 \item[|*rdepth|] The total depth of the search structure.
 302   \fixme{why is this an interesting thing to know?}
 303 \item[|*rbranch|] The number of branch nodes in the search structure.
 304 \item[|*rleaves|] The number of leaf nodes in the search structure.
 305 \end{description}
 306
 307 The |Tdump| function prints a diagnostic dump of a table to standard output.
 308
 309 Both of these functions are primitive, implemented separately for each data
 310 structure.
 311
 312 @<\texttt{Tbl.h} declarations@>=
 313 void Tdump(Tbl *tbl);
 314 void Tsize(Tbl *tbl, const char **rtype, @|
 315     size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves);
 316
 317 @ \relax
 318
 319 %%%--------------------------------------------------------------------------
 320 @* QP-Tries.
 321
 322 @*1 Theory lesson.
 323
 324 QP-tries are a data structure for keeping track of a dynamic set of
 325 \emph{strings}.  It'll be best if we don't think of these strings as being
 326 made up of bytes or characters: those units are too large to be convenient.
 327 Instead, we'll work with a smaller set~$\Sigma$ of \emph{symbols}, which will
 328 usually be small fixed-size groups of bits, say four or five.  The limiting
 329 factor will be that the number of symbols $\card\Sigma$ must be at most equal
 330 to the machine's word size $w$.
 331
 332 There's another constraint, which is that the set of strings that we work
 333 with must be \emph{prefix-free}.  This usually most easily arranged by ending
 334 each string with a \emph{terminating symbol}.  For C~strings, the obvious
 335 choice is a null character, since C has a preference for null-terminated
 336 strings anyway.
 337
 338 @ The trie \emph{root} points at a node.  The trie structure consists of two
 339 kinds of nodes called \emph{branch} nodes and \emph{leaf} nodes.  Both kinds
 340 of nodes are the same size.  Both kinds of notes have a \emph{tag bit} which
 341 is clear in leaf nodes and set in branch nodes.  (The opposite convention is
 342 possible, but doesn't work as well in practice.)
 343
 344 In broad outline, searching QP-trie for a key proceeds as follows.  We start
 345 at the root.  A branch node has children corresponding to the symbols in
 346 $\Sigma$: if we encounter one, we move to the child selected by one of the
 347 symbols of the search key.  A leaf node holds a key and its associated value:
 348 if we encounter one, then we compare the search key against the key in the
 349 leaf, and return the associated value if keys match, or report failure if
 350 they don't.
 351
 352 The trie is compressed in two ways.
 353 \begin{itemize}
 354 \item Branch nodes which would have fewer than two children are elided.
 355   Branch nodes contain an \emph{offset} to indicate which of the search key's
 356   symbols to check.
 357 \item Children which don't have leaves are omitted.  A branch node contains a
 358   \emph{bitmap}, indexed by symbols in some convenient ordering: the bit
 359   corresponding to a symbol is set if the node has a corresponding child.  A
 360   branch node also holds a pointer to a \emph{twigs} vector, which simply
 361   holds the node's remaining children, in order.
 362 \end{itemize}
 363 The size of a branch node's twigs vector is equal to the Hamming weight --
 364 i.e., the number of set bits -- of its bitmap~$B$.  Moreover, the index of
 365 the child corresponding to a symbol $s \in \Sigma$, if one exists, is equal
 366 to the number of present children corresponding to symbols preceding~$s$ in
 367 the ordering, which can conveniently be computed by
 368 \[ \hwt\bigparens{B \bitand \bigparens{(1 \lsl s) - 1}} \mpunct, \]
 369 where $\hwt(\cdot)$ denotes Hamming weight.
 370
 371 This leads to the first algorithm, for lookup in a QP-trie.
 372
 373 \begin{list}
 374   {\textbf{Algorithm L} \emph{(Lookup in a QP-trie.)}}
 375   {\leftmargin=0pt \rightmargin=0pt \labelwidth=-\labelsep}
 376 \item
 377 Given a QP-trie root pointer $\texttt{ROOT}$ and a key string~$k$ of
 378 length~$n$, find the value associated with~$k$ in the trie.
 379
 380 Every node~$\texttt{N}$ has a tag bit $\texttt{TAG(N)}$.  If
 381 $\texttt{TAG(N)} = 0$ then $\texttt{N}$ points to a leaf node, which has
 382 $\texttt{KEY}$ and $\texttt{VALUE}$ fields.  If $\texttt{TAG(N)} = 1$ then
 383 $\texttt{N}$ points to a branch node, which has $\texttt{OFFSET}$,
 384 $\texttt{BITMAP}$, and $\texttt{TWIGS}$ fields.
 385
 386 \begin{enumerate}[\bfseries L1.]
 387 \item{} [Empty tree?]
 388   If $\texttt{ROOT} = \NULL$, then return failure.
 389 \item{} [Initialize.]
 390   Set $\texttt{N} \gets \texttt{ROOT}$.
 391 \item{} [Check tag.] \label{en:qp.lookup.chk-tag}
 392   If $\texttt{TAG(N)} = 0$ then then $\texttt{N}$ points to a leaf node: go to
 393   step~L\ref{en:qp.lookup.cmp-key}.  Otherwise, it points to a branch node: go
 394   to step~L\ref{en:qp.lookup.chk-len}.
 395 \item{} [Check length.] \label{en:qp.lookup.chk-len}
 396   If $\texttt{OFFSET(N)} \ge n$ then return failure.  Otherwise, continue
 397   with the next step.
 398 \item{} [Check symbol.]
 399   Let $s \gets k[\texttt{OFFSET(N)}]$.  If $\texttt{BITMAP(N)} \bitand (1 \lsl
 400   s) = 0$ then return failure.  Otherwise, set $\texttt{N}$ to be the address
 401   of $\texttt{TWIGS(N)}[\hwt(\texttt{BITMAP(N)} \bitand ((1 \lsl s) - 1))]$
 402   and go to step~L\ref{en:qp.lookup.chk-tag}.
 403 \item{} [Compare key.] \label{en:qp.lookup.cmp-key}
 404   If the strings $k$ and $\texttt{KEY(N)}$ are equal then return
 405   $\texttt{VALUE(N)}$ successfully; otherwise return failure.
 406   \qed
 407 \end{enumerate}
 408 \end{list}
 409
 410 \begin{figure}
 411   \centering
 412   \begin{tikzpicture}
 413       [x = 48mm, y = 10mm]
 414     \node[algcond] (emptyp) at (0, 8) {L1. Empty tree?};
 415     \node[algstep] (init) at (0, 6) {L2. Initialize};
 416     \node[algcond] (leafp) at (0, 4) {L3. Check tag};
 417     \node[algcond] (len) at (1, 5.5) {L4. Check length};
 418     \node[algcond] (sym) at (1, 3) {L5. Check symbol};
 419     \node[algcond] (key) at (0, 2) {L6. Compare key};
 420     \node (lose) at (2, 4) {\texttt{FAILURE}};
 421     \node (win) at (0, 0) {\texttt{SUCCESS}};
 422     \path[->]
 423       (emptyp)
 424         edge node[left] {$\texttt{ROOT} \ne \NULL$} (init)
 425         edge [out = 0, in = 140]
 426           node[above, sloped] {$\texttt{ROOT} = \NULL$} (lose)
 427       (init)
 428         edge (leafp)
 429       (leafp)
 430         edge node[left] {$\texttt{TAG(N)} = 0$} (key)
 431         edge node[sloped, above] {$\texttt{TAG(N)} = 1$} (len)
 432       (len)
 433         edge node[left] {$\texttt{OFFSET(N)} < n$} (sym)
 434         edge node[above, sloped] {$\texttt{OFFSET(N)} \ge n$} (lose)
 435       (sym)
 436         edge node[below, sloped, align = center]
 437           {bit $s$ set in \\ $\texttt{BITMAP(N)}$} (leafp)
 438         edge node[below, sloped, align = center]
 439             {bit $s$ clear in \\ $\texttt{BITMAP(N)}$} (lose)
 440       (key)
 441         edge node[left] {$k$ matches $\texttt{KEY(N)}$} (win)
 442         edge [out = 0, in = -125]
 443           node[below] {$k$ doesn't match $\texttt{KEY(N)}$} (lose);
 444   \end{tikzpicture}
 445   \caption{QP-trie lookup algorithm} \label{fig:qp.lookup}
 446 \end{figure}
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471 @* {qp.h}.
 472
 473 @(qp.h@>=
 474 // qp.h: tables implemented with quadbit popcount patricia tries.
 475 //
 476 // Written by Tony Finch <dot@@dotat.at>
 477 // You may do anything with this. It has no warranty.
 478 // <http://creativecommons.org/publicdomain/zero/1.0/>
 479
 480 // In a trie, keys are divided into digits depending on some radix
 481 // e.g. base 2 for binary tries, base 256 for byte-indexed tries.
 482 // When searching the trie, successive digits in the key, from most to
 483 // least significant, are used to select branches from successive
 484 // nodes in the trie, like:
 485 //      |for(i = 0; isbranch(node); i++) node = node->branch[key[i]];|
 486 // All of the keys in a subtrie have identical prefixes. Tries do not
 487 // need to store keys since they are implicit in the structure.
 488 //
 489 // A patricia trie or crit-bit trie is a binary trie which omits nodes that
 490 // have only one child. Nodes are annotated with the index of the bit that
 491 // is used to select the branch; indexes always increase as you go further
 492 // into the trie. Each leaf has a copy of its key so that when you find a
 493 // leaf you can verify that the untested bits match.
 494 //
 495 // The popcount() function counts the number of bits that are set in
 496 // a word. It's also known as the Hamming weight; Knuth calls it
 497 // "sideways add". https://en.wikipedia.org/wiki/popcount
 498 //
 499 // You can use popcount() to implement a sparse array of length N
 500 // containing M < N members using bitmap of length N and a packed
 501 // vector of M elements. A member i is present in the array if bit
 502 // i is set, so M == popcount(bitmap). The index of member i in
 503 // the packed vector is the popcount of the bits preceding i.
 504 //      |mask = 1 << i;|
 505 //      |if(bitmap & mask)|
 506 //              |member = vector[popcount(bitmap & mask-1)]|
 507 //
 508 // See "Hacker's Delight" by Hank Warren, section 5-1 "Counting 1
 509 // bits", subsection "applications". http://www.hackersdelight.org
 510 //
 511 // Phil Bagwell's hashed array-mapped tries (HAMT) use popcount for
 512 // compact trie nodes. String keys are hashed, and the hash is used
 513 // as the index to the trie, with radix $2^{32}$ or $2^{64}$.
 514 // http://infoscience.epfl.ch/record/64394/files/triesearches.pdf
 515 // http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf
 516 //
 517 // A qp trie uses its keys a quadbit (or nibble or half-byte) at a
 518 // time. It is a radix $2^4$ patricia trie, so each node can have between
 519 // 2 and 16 children. It uses a 16 bit word to mark which children are
 520 // present and popcount to index them. The aim is to improve on crit-bit
 521 // tries by reducing memory usage and the number of indirections
 522 // required to look up a key.
 523 //
 524 // The worst case for a qp trie is when each branch has 2 children;
 525 // then it is the same shape as a crit-bit trie. In this case there
 526 // are n-1 internal branch nodes of two words each, so it is equally
 527 // efficient as a crit-bit trie. If the key space is denser then
 528 // branches have more children but the same overhead, so the memory
 529 // usage is less. For maximally dense tries the overhead is:
 530 //
 531 // key length (bytes)    $n$
 532 // number of leaves      $256^n$
 533 // crit-bit branches     $256^n - 1$
 534 // qp branches           $1 + 16^{2n-1} = 1 + 256^n / 16$
 535 // crit-bit depth        $8 n$
 536 // qp depth              $2 n$
 537 //
 538 // In practice, qp averages about 3.3 words per leaf vs. crit-bit's 4
 539 // words per leaf, and qp has about half the depth.
 540
 541 typedef unsigned char byte;
 542 typedef unsigned int uint;
 543
 544 typedef uint Tbitmap;
 545
 546 #if defined(HAVE_NARROW_CPU) || defined(HAVE_SLOW_POPCOUNT)
 547
 548 // NOTE: 16 bits only
 549
 550 static inline uint
 551 popcount(Tbitmap w) {
 552         w -= (w >> 1) & 0x5555;
 553         w = (w & 0x3333) + ((w >> 2) & 0x3333);
 554         w = (w + (w >> 4)) & 0x0F0F;
 555         w = (w + (w >> 8)) & 0x00FF;
 556         return(w);
 557 }
 558
 559 #else
 560
 561 static inline uint
 562 popcount(Tbitmap w) {
 563         return((uint)__builtin_popcount(w));
 564 }
 565
 566 #endif
 567
 568 // Parallel popcount of the top and bottom 16 bits in a 32 bit word. This
 569 // is probably only a win if your CPU is short of registers and/or integer
 570 // units. NOTE: The caller needs to extract the results by masking with
 571 // 0x00FF0000 and 0x000000FF for the top and bottom halves.
 572
 573 static inline uint
 574 popcount16x2(uint w) {
 575         w -= (w >> 1) & 0x55555555;
 576         w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
 577         w = (w + (w >> 4)) & 0x0F0F0F0F;
 578         w = w + (w >> 8);
 579         return(w);
 580 }
 581
 582 // A trie node is two words on 64 bit machines, or three on 32 bit
 583 // machines. A node can be a leaf or a branch. In a leaf, the value
 584 // pointer must be word-aligned to allow for the tag bits.
 585
 586 typedef struct Tleaf {
 587         const char *key;
 588         void *val;
 589 } Tleaf;
 590
 591 // Branch nodes are distinguished from leaf nodes using a couple
 592 // of flag bits which act as a dynamic type tag. They can be:
 593 //
 594 // 0 -> node is a leaf
 595 // 1 -> node is a branch, testing upper nibble
 596 // 2 -> node is a branch, testing lower nibble
 597 //
 598 // A branch node is laid out so that the flag bits correspond to the
 599 // least significant bits bits of one of the leaf node pointers. In a
 600 // leaf node, that pointer must be word-aligned so that its flag bits
 601 // are zero. We have chosen to place this restriction on the value
 602 // pointer.
 603 //
 604 // A branch contains the index of the byte that it tests. The combined
 605 // value \bitor|index << 2 % flags|! increases along the key in big-endian
 606 // lexicographic order, and increases as you go deeper into the trie.
 607 // All the keys below a branch are identical up to the nibble
 608 // identified by the branch.
 609 //
 610 // A branch has a bitmap of which subtries ("twigs") are present. The
 611 // flags, index, and bitmap are packed into one word. The other word
 612 // is a pointer to an array of trie nodes, one for each twig that is
 613 // present.
 614
 615 // XXX We hope that the compiler will not punish us for abusing unions.
 616
 617 // XXX This currently assumes a 64 bit little endian machine.
 618 // On a 32 bit machine we could perhaps fit a branch in to two words
 619 // without restricting the key length by making the index relative
 620 // instead of absolute. If the gap between nodes is larger than a 16
 621 // bit offset allows, we can insert a stepping-stone branch with only
 622 // one twig. This would make the code a bit more complicated...
 623
 624 typedef struct Tbranch {
 625         union Trie *twigs;
 626         uint64_t
 627                 flags : 2,
 628                 index : 46,
 629                 bitmap : 16;
 630 } Tbranch;
 631
 632 typedef union Trie {
 633         struct Tleaf   leaf;
 634         struct Tbranch branch;
 635 } Trie;
 636
 637 struct Tbl {
 638         union Trie root;
 639 };
 640
 641 // Test flags to determine type of this node.
 642
 643 static inline bool
 644 isbranch(Trie *t) {
 645         return(t->branch.flags != 0);
 646 }
 647
 648 // Make a bitmask for testing a branch bitmap.
 649 //
 650 // mask:
 651 // 1 -> 0xffff -> 0xfff0 -> 0xf0
 652 // 2 -> 0x0000 -> 0x000f -> 0x0f
 653 //
 654 // shift:
 655 // 1 -> 1 -> 4
 656 // 2 -> 0 -> 0
 657
 658 static inline Tbitmap
 659 nibbit(byte k, uint flags) {
 660         uint mask = ((flags - 2) ^ 0x0f) & 0xff;
 661         uint shift = (2 - flags) << 2;
 662         return(1 << ((k & mask) >> shift));
 663 }
 664
 665 // Extract a nibble from a key and turn it into a bitmask.
 666
 667 static inline Tbitmap
 668 twigbit(Trie *t, const char *key, size_t len) {
 669         uint64_t i = t->branch.index;
 670         if(i >= len) return(1);
 671         return(nibbit((byte)key[i], t->branch.flags));
 672 }
 673
 674 static inline bool
 675 hastwig(Trie *t, Tbitmap bit) {
 676         return(t->branch.bitmap & bit);
 677 }
 678
 679 static inline uint
 680 twigoff(Trie *t, Tbitmap b) {
 681         return(popcount(t->branch.bitmap & (b-1)));
 682 }
 683
 684 static inline Trie *
 685 twig(Trie *t, uint i) {
 686         return(&t->branch.twigs[i]);
 687 }
 688
 689 #ifdef HAVE_NARROW_CPU
 690
 691 #define TWIGOFFMAX(off, max, t, b) do {                         \
 692                 Tbitmap bitmap = t->branch.bitmap;              \
 693                 uint word = (bitmap << 16) | (bitmap & (b-1));  \
 694                 uint counts = popcount16x2(word);               \
 695                 off = counts & 0xFF;                            \
 696                 max = (counts >> 16) & 0xFF;                    \
 697         } while(0)
 698
 699 #else
 700
 701 #define TWIGOFFMAX(off, max, t, b) do {                 \
 702                 off = twigoff(t, b);                    \
 703                 max = popcount(t->branch.bitmap);       \
 704         } while(0)
 705
 706 #endif
 707
 708 @* {qp.c}.
 709
 710 @(qp.c@>=
 711 // qp.c: tables implemented with quadbit popcount patricia tries.
 712 //
 713 // Written by Tony Finch <dot@@dotat.at>
 714 // You may do anything with this. It has no warranty.
 715 // <http://creativecommons.org/publicdomain/zero/1.0/>
 716
 717 #include <assert.h>
 718 #include <errno.h>
 719 #include <stdbool.h>
 720 #include <stdint.h>
 721 #include <stdlib.h>
 722 #include <string.h>
 723
 724 #include "Tbl.h"
 725 #include "qp.h"
 726
 727 bool
 728 Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
 729         if(tbl == NULL)
 730                 return(false);
 731         Trie *t = &tbl->root;
 732         while(isbranch(t)) {
 733                 __builtin_prefetch(t->branch.twigs);
 734                 Tbitmap b = twigbit(t, key, len);
 735                 if(!hastwig(t, b))
 736                         return(false);
 737                 t = twig(t, twigoff(t, b));
 738         }
 739         if(strcmp(key, t->leaf.key) != 0)
 740                 return(false);
 741         *pkey = t->leaf.key;
 742         *pval = t->leaf.val;
 743         return(true);
 744 }
 745
 746 static bool
 747 next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) {
 748         if(isbranch(t)) {
 749                 // Recurse to find either this leaf (*pkey != NULL)
 750                 // or the next one (*pkey == NULL).
 751                 Tbitmap b = twigbit(t, *pkey, *plen);
 752                 uint s, m; TWIGOFFMAX(s, m, t, b);
 753                 for(; s < m; s++)
 754                         if(next_rec(twig(t, s), pkey, plen, pval))
 755                                 return(true);
 756                 return(false);
 757         }
 758         // We have found the next leaf.
 759         if(*pkey == NULL) {
 760                 *pkey = t->leaf.key;
 761                 *plen = strlen(*pkey);
 762                 *pval = t->leaf.val;
 763                 return(true);
 764         }
 765         // We have found this leaf, so start looking for the next one.
 766         if(strcmp(*pkey, t->leaf.key) == 0) {
 767                 *pkey = NULL;
 768                 *plen = 0;
 769                 return(false);
 770         }
 771         // No match.
 772         return(false);
 773 }
 774
 775 bool
 776 Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) {
 777         if(tbl == NULL) {
 778                 *pkey = NULL;
 779                 *plen = 0;
 780                 return(NULL);
 781         }
 782         return(next_rec(&tbl->root, pkey, plen, pval));
 783 }
 784
 785 Tbl *
 786 Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
 787         if(tbl == NULL)
 788                 return(NULL);
 789         Trie *t = &tbl->root, *p = NULL;
 790         Tbitmap b = 0;
 791         while(isbranch(t)) {
 792                 __builtin_prefetch(t->branch.twigs);
 793                 b = twigbit(t, key, len);
 794                 if(!hastwig(t, b))
 795                         return(tbl);
 796                 p = t; t = twig(t, twigoff(t, b));
 797         }
 798         if(strcmp(key, t->leaf.key) != 0)
 799                 return(tbl);
 800         *pkey = t->leaf.key;
 801         *pval = t->leaf.val;
 802         if(p == NULL) {
 803                 free(tbl);
 804                 return(NULL);
 805         }
 806         t = p; p = NULL; // Becuase t is the usual name
 807         uint s, m; TWIGOFFMAX(s, m, t, b);
 808         if(m == 2) {
 809                 // Move the other twig to the parent branch.
 810                 Trie *twigs = t->branch.twigs;
 811                 *t = *twig(t, !s);
 812                 free(twigs);
 813                 return(tbl);
 814         }
 815         memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1));
 816         t->branch.bitmap &= ~b;
 817         // We have now correctly removed the twig from the trie, so if
 818         // realloc() fails we can ignore it and continue to use the
 819         // slightly oversized twig array.
 820         Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1));
 821         if(twigs != NULL) t->branch.twigs = twigs;
 822         return(tbl);
 823 }
 824
 825 Tbl *
 826 Tsetl(Tbl *tbl, const char *key, size_t len, void *val) {
 827         // Ensure flag bits are zero.
 828         if(((uint64_t)val & 3) != 0) {
 829                 errno = EINVAL;
 830                 return(NULL);
 831         }
 832         if(val == NULL)
 833                 return(Tdell(tbl, key, len));
 834         // First leaf in an empty tbl?
 835         if(tbl == NULL) {
 836                 tbl = malloc(sizeof(*tbl));
 837                 if(tbl == NULL) return(NULL);
 838                 tbl->root.leaf.key = key;
 839                 tbl->root.leaf.val = val;
 840                 return(tbl);
 841         }
 842         Trie *t = &tbl->root;
 843         // Find the most similar leaf node in the trie. We will compare
 844         // its key with our new key to find the first differing nibble,
 845         // which can be at a lower index than the point at which we
 846         // detect a difference.
 847         while(isbranch(t)) {
 848                 __builtin_prefetch(t->branch.twigs);
 849                 Tbitmap b = twigbit(t, key, len);
 850                 // Even if our key is missing from this branch we need to
 851                 // keep iterating down to a leaf. It doesn't matter which
 852                 // twig we choose since the keys are all the same up to this
 853                 // index. Note that blindly using twigoff(t, b) can cause
 854                 // an out-of-bounds index if it equals twigmax(t).
 855                 uint i = hastwig(t, b) ? twigoff(t, b) : 0;
 856                 t = twig(t, i);
 857         }
 858         // Do the keys differ, and if so, where?
 859         size_t i;
 860         for(i = 0; i <= len; i++) {
 861                 if(key[i] != t->leaf.key[i])
 862                         goto newkey;
 863         }
 864         t->leaf.val = val;
 865         return(tbl);
 866 newkey:; // We have the branch's index; what are its flags?
 867         byte k1 = (byte)key[i], k2 = (byte)t->leaf.key[i];
 868         uint f =  k1 ^ k2;
 869         f = (f & 0xf0) ? 1 : 2;
 870         // Prepare the new leaf.
 871         Tbitmap b1 = nibbit(k1, f);
 872         Trie t1 = { .leaf = { .key = key, .val = val } };
 873         // Find where to insert a branch or grow an existing branch.
 874         t = &tbl->root;
 875         while(isbranch(t)) {
 876                 __builtin_prefetch(t->branch.twigs);
 877                 if(i == t->branch.index && f == t->branch.flags)
 878                         goto growbranch;
 879                 if(i == t->branch.index && f < t->branch.flags)
 880                         goto newbranch;
 881                 if(i < t->branch.index)
 882                         goto newbranch;
 883                 Tbitmap b = twigbit(t, key, len);
 884                 assert(hastwig(t, b));
 885                 t = twig(t, twigoff(t, b));
 886         }
 887 newbranch:;
 888         Trie *twigs = malloc(sizeof(Trie) * 2);
 889         if(twigs == NULL) return(NULL);
 890         Trie t2 = *t; // Save before overwriting.
 891         Tbitmap b2 = nibbit(k2, f);
 892         t->branch.twigs = twigs;
 893         t->branch.flags = f;
 894         t->branch.index = i;
 895         t->branch.bitmap = b1 | b2;
 896         *twig(t, twigoff(t, b1)) = t1;
 897         *twig(t, twigoff(t, b2)) = t2;
 898         return(tbl);
 899 growbranch:;
 900         assert(!hastwig(t, b1));
 901         uint s, m; TWIGOFFMAX(s, m, t, b1);
 902         twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1));
 903         if(twigs == NULL) return(NULL);
 904         memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s));
 905         memmove(twigs+s, &t1, sizeof(Trie));
 906         t->branch.twigs = twigs;
 907         t->branch.bitmap |= b1;
 908         return(tbl);
 909 }
 910
 911 @* {fn.h}.
 912
 913 @(fn.h@>=
 914 // fn.h: quintet bit popcount patricia tries, new version
 915 //
 916 // This version uses somewhat different terminology than older
 917 // variants. The location of a quintet in the key is now called its
 918 // "offset", and the whole word containing the offset, bitmap, and tag
 919 // bit is called the "index word" (by analogy with a database index).
 920 // The precise quintet location is represented as a byte offset and a
 921 // shift. Previously a flags field contained the isbranch tag and shift,
 922 // but these are now separate.
 923 //
 924 // Instead of trying to use bit fields, this code uses accessor
 925 // functions to split up a pair of words into their constituent parts.
 926 // This should improve portability to machines with varying endianness
 927 // and/or word size.
 928 //
 929 // Written by Tony Finch <dot@@dotat.at>
 930 // You may do anything with this. It has no warranty.
 931 // <http://creativecommons.org/publicdomain/zero/1.0/>
 932
 933 typedef unsigned char byte;
 934 typedef unsigned int uint;
 935
 936 typedef uint32_t Tbitmap;
 937 typedef uint64_t Tindex;
 938
 939 const char *dump_bitmap(Tbitmap w);
 940
 941 static inline uint
 942 byte_me(char c) {
 943         return(c & 0xFF);
 944 }
 945
 946 static inline uint
 947 word_up(const char *p) {
 948         uint w = byte_me(p[0]) << 8;
 949         if(w) w |= byte_me(p[1]);
 950         return(w);
 951 }
 952
 953 #if defined(HAVE_SLOW_POPCOUNT)
 954
 955 static inline uint
 956 popcount(Tbitmap w) {
 957         w -= (w >> 1) & 0x55555555;
 958         w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
 959         w = (w + (w >> 4)) & 0x0F0F0F0F;
 960         w = (w * 0x01010101) >> 24;
 961         return(w);
 962 }
 963
 964 #else
 965
 966 static inline uint
 967 popcount(Tbitmap w) {
 968         return((uint)__builtin_popcount(w));
 969 }
 970
 971 #endif
 972
 973 typedef struct Tbl {
 974         Tindex index;
 975         void *ptr;
 976 } Trie;
 977
 978 // accessor functions, except for the index word
 979
 980 #define Tset_field(cast, elem, type, field)     \
 981         static inline void                      \
 982         Tset_##field(Trie *t, type field) {     \
 983                 t->elem = cast field;           \
 984         }                                       \
 985         struct dummy @;
 986
 987 Tset_field((void *),           ptr,   Trie *,       twigs);
 988 Tset_field((Tindex),           index, Tindex,       index);
 989 Tset_field((void *)(uint64_t), ptr,   const char *, key);
 990 Tset_field((Tindex),           index, void *,       val);
 991
 992 static inline bool Tindex_branch(Tindex i);
 993
 994 static inline bool isbranch(Trie *t) {
 995         return(Tindex_branch(t->index));
 996 }
 997
 998 #ifdef WITH_EXTRA_CHECKS
 999 #define Tbranch(t) assert(isbranch(t))
1000 #define Tleaf(t)  assert(!isbranch(t))
1001 #else
1002 #define Tbranch(t)
1003 #define Tleaf(t)
1004 #endif
1005
1006 #define Tcheck_get(type, tag, field, expr)      \
1007         static inline type                      \
1008         tag##_##field(Trie *t) {                \
1009                 tag(t);                         \
1010                 return(expr);                   \
1011         }                                       \
1012         struct dummy @;
1013
1014 Tcheck_get(Trie *,       Tbranch, twigs, t->ptr);
1015 Tcheck_get(const char *, Tleaf,   key,   t->ptr);
1016 Tcheck_get(void *,       Tleaf,   val,   (void*)t->index);
1017
1018 // index word layout
1019
1020 #define Tix_width_branch 1
1021 #define Tix_width_shift  3
1022 #define Tix_width_offset 28
1023 #define Tix_width_bitmap 32
1024
1025 #define Tix_base_branch 0
1026 #define Tix_base_shift  (Tix_base_branch + Tix_width_branch)
1027 #define Tix_base_offset (Tix_base_shift  + Tix_width_shift)
1028 #define Tix_base_bitmap (Tix_base_offset + Tix_width_offset)
1029
1030 #define Tix_place(field) ((Tindex)(field) << Tix_base_##field)
1031
1032 #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL)
1033
1034 #define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field)      \
1035                                      & Tix_mask(field)))
1036
1037 #define Tmaxlen Tix_mask(offset)
1038
1039 // index word accessor functions
1040
1041 #define Tindex_get(type, field)                                 \
1042         static inline type                                      \
1043         Tindex_##field(Tindex i) {                              \
1044                 return(Tunmask(field, i));                      \
1045         }                                                       \
1046         struct dummy @;
1047
1048 Tindex_get(bool, branch);
1049 Tindex_get(uint, shift);
1050 Tindex_get(uint, offset);
1051 Tindex_get(Tbitmap, bitmap);
1052
1053 static inline Tindex
1054 Tindex_new(uint shift, uint offset, Tbitmap bitmap) {
1055         uint branch = 1;
1056         return( Tix_place(branch) |
1057                 Tix_place(shift)  |
1058                 Tix_place(offset) |
1059                 Tix_place(bitmap) );
1060 }
1061
1062 static inline Tindex
1063 Tbitmap_add(Tindex i, Tbitmap bitmap) {
1064         return(i | Tix_place(bitmap));
1065 }
1066
1067 static inline Tindex
1068 Tbitmap_del(Tindex i, Tbitmap bitmap) {
1069         return(i & ~Tix_place(bitmap));
1070 }
1071
1072 // sanity checks!
1073
1074 #ifndef static_assert
1075 #define static_assert_cat(a,b) a##b
1076 #define static_assert_name(line) static_assert_cat(static_assert_,line)
1077 #define static_assert(must_be_true,message)                             \
1078         static const void *static_assert_name(__LINE__)                 \
1079                 [must_be_true ? 2 : -1] = {                             \
1080                         message,                                        \
1081                         &static_assert_name(__LINE__) }
1082 #endif
1083
1084 static_assert(Tix_base_bitmap + Tix_width_bitmap == 64,
1085               "index fields must fill a 64 bit word");
1086
1087 static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678,
1088               "extracting the bitmap works");
1089
1090 static_assert(Tunmask(offset,0x0420ULL) == 0x42,
1091               "extracting the offset works");
1092
1093 static_assert(Tunmask(shift,0xFEDCBAULL) == 5,
1094               "extracting the shift works");
1095
1096 @=//  ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4]..@>
1097 @=// |               |               |               |               |               |@>
1098 @=//  7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0@>
1099 @=// |         |         |         |         |         |         |         |         |@>
1100 @=//  shift=0   shift=5   shift=2   shift=7   shift=4   shift=1   shift=6   shift=3@>
1101
1102 static inline byte
1103 knybble(const char *key, uint off, uint shift) {
1104         uint word = word_up(key+off);
1105         uint right = 16 - 5 - shift;
1106         return((word >> right) & 0x1FU);
1107 }
1108
1109 static inline byte
1110 nibble(Tindex i, const char *key, size_t len) {
1111         uint off = Tindex_offset(i);
1112         if(off >= len) return(0);
1113         else return(knybble(key, off, Tindex_shift(i)));
1114 }
1115
1116 static inline Tbitmap
1117 twigbit(Tindex i, const char *key, size_t len) {
1118         return(1U << nibble(i, key, len));
1119 }
1120
1121 static inline bool
1122 hastwig(Tindex i, Tbitmap bit) {
1123         return(Tindex_bitmap(i) & bit);
1124 }
1125
1126 static inline uint
1127 twigoff(Tindex i, Tbitmap bit) {
1128         return(popcount(Tindex_bitmap(i) & (bit-1)));
1129 }
1130
1131 #define TWIGOFFMAX(off, max, i, b) do {                 \
1132                 off = twigoff(i, b);                    \
1133                 max = popcount(Tindex_bitmap(i));       \
1134         } while(0)
1135
1136 @* {fn.c}.
1137
1138 @(fn.c@>=
1139 // fn.h: quintet bit popcount patricia tries, new version
1140 //
1141 // Written by Tony Finch <dot@@dotat.at>
1142 // You may do anything with this. It has no warranty.
1143 // <http://creativecommons.org/publicdomain/zero/1.0/>
1144
1145 #include <assert.h>
1146 #include <errno.h>
1147 #include <stdbool.h>
1148 #include <stdint.h>
1149 #include <stdlib.h>
1150 #include <string.h>
1151
1152 #include "Tbl.h"
1153 #include "fn.h"
1154
1155 bool
1156 Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) {
1157         if(t == NULL)
1158                 return(false);
1159         while(isbranch(t)) {
1160                 __builtin_prefetch(t->ptr);
1161                 Tindex i = t->index;
1162                 Tbitmap b = twigbit(i, key, len);
1163                 if(!hastwig(i, b))
1164                         return(false);
1165                 t = Tbranch_twigs(t) + twigoff(i, b);
1166         }
1167         if(strcmp(key, Tleaf_key(t)) != 0)
1168                 return(false);
1169         *pkey = Tleaf_key(t);
1170         *pval = Tleaf_val(t);
1171         return(true);
1172 }
1173
1174 static bool
1175 next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) {
1176         Tindex i = t->index;
1177         if(Tindex_branch(i)) {
1178                 // Recurse to find either this leaf (*pkey != NULL)
1179                 // or the next one (*pkey == NULL).
1180                 Tbitmap b = twigbit(i, *pkey, *plen);
1181                 uint s, m; TWIGOFFMAX(s, m, i, b);
1182                 for(; s < m; s++)
1183                         if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval))
1184                                 return(true);
1185                 return(false);
1186         }
1187         // We have found the next leaf.
1188         if(*pkey == NULL) {
1189                 *pkey = Tleaf_key(t);
1190                 *plen = strlen(*pkey);
1191                 *pval = Tleaf_val(t);
1192                 return(true);
1193         }
1194         // We have found this leaf, so start looking for the next one.
1195         if(strcmp(*pkey, Tleaf_key(t)) == 0) {
1196                 *pkey = NULL;
1197                 *plen = 0;
1198                 return(false);
1199         }
1200         // No match.
1201         return(false);
1202 }
1203
1204 bool
1205 Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) {
1206         if(tbl == NULL) {
1207                 *pkey = NULL;
1208                 *plen = 0;
1209                 return(NULL);
1210         }
1211         return(next_rec(tbl, pkey, plen, pval));
1212 }
1213
1214 Tbl *
1215 Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
1216         if(tbl == NULL)
1217                 return(NULL);
1218         Trie *t = tbl, *p = NULL;
1219         Tindex i = 0;
1220         Tbitmap b = 0;
1221         while(isbranch(t)) {
1222                 __builtin_prefetch(t->ptr);
1223                 i = t->index;
1224                 b = twigbit(i, key, len);
1225                 if(!hastwig(i, b))
1226                         return(tbl);
1227                 p = t; t = Tbranch_twigs(t) + twigoff(i, b);
1228         }
1229         if(strcmp(key, Tleaf_key(t)) != 0)
1230                 return(tbl);
1231         *pkey = Tleaf_key(t);
1232         *pval = Tleaf_val(t);
1233         if(p == NULL) {
1234                 free(tbl);
1235                 return(NULL);
1236         }
1237         Trie *twigs = Tbranch_twigs(p);
1238         uint m = popcount(Tindex_bitmap(i));
1239         assert(twigs <= t && t < twigs+m);
1240         if(m == 2) {
1241                 // Move the other twig to the parent branch.
1242                 *p = twigs[twigs == t];
1243                 free(twigs);
1244                 return(tbl);
1245         }
1246         memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie));
1247         p->index = Tbitmap_del(i, b);
1248         // We have now correctly removed the twig from the trie, so if
1249         // realloc() fails we can ignore it and continue to use the
1250         // slightly oversized twig array.
1251         twigs = realloc(twigs, sizeof(Trie) * (m - 1));
1252         if(twigs != NULL) Tset_twigs(p, twigs);
1253         return(tbl);
1254 }
1255
1256 Tbl *
1257 Tsetl(Tbl *tbl, const char *key, size_t len, void *val) {
1258         if(Tindex_branch((Tindex)val) || len > Tmaxlen) {
1259                 errno = EINVAL;
1260                 return(NULL);
1261         }
1262         if(val == NULL)
1263                 return(Tdell(tbl, key, len));
1264         // First leaf in an empty tbl?
1265         if(tbl == NULL) {
1266                 tbl = malloc(sizeof(*tbl));
1267                 if(tbl == NULL) return(NULL);
1268                 Tset_key(tbl, key);
1269                 Tset_val(tbl, val);
1270                 return(tbl);
1271         }
1272         Trie *t = tbl;
1273         // Find the most similar leaf node in the trie. We will compare
1274         // its key with our new key to find the first differing nibble,
1275         // which can be at a lower index than the point at which we
1276         // detect a difference.
1277         while(isbranch(t)) {
1278                 __builtin_prefetch(t->ptr);
1279                 Tindex i = t->index;
1280                 Tbitmap b = twigbit(i, key, len);
1281                 // Even if our key is missing from this branch we need to
1282                 // keep iterating down to a leaf. It doesn't matter which
1283                 // twig we choose since the keys are all the same up to this
1284                 // index. Note that blindly using twigoff(t, b) can cause
1285                 // an out-of-bounds index if it equals twigmax(t).
1286                 uint s = hastwig(i, b) ? twigoff(i, b) : 0;
1287                 t = Tbranch_twigs(t) + s;
1288         }
1289         // Do the keys differ, and if so, where?
1290         uint off, xor, shf;
1291         const char *tkey = Tleaf_key(t);
1292         for(off = 0; off <= len; off++) {
1293                 xor = (byte)key[off] ^ (byte)tkey[off];
1294                 if(xor != 0) goto newkey;
1295         }
1296         Tset_val(t, val);
1297         return(tbl);
1298 newkey:; // We have the branch's byte index; what is its chunk index?
1299         uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8;
1300         uint qo = bit / 5;
1301         off = qo * 5 / 8;
1302         shf = qo * 5 % 8;
1303         // re-index keys with adjusted offset
1304         Tbitmap nb = 1U << knybble(key,off,shf);
1305         Tbitmap tb = 1U << knybble(tkey,off,shf);
1306         // Prepare the new leaf.
1307         Trie nt;
1308         Tset_key(&nt, key);
1309         Tset_val(&nt, val);
1310         // Find where to insert a branch or grow an existing branch.
1311         t = tbl;
1312         Tindex i = 0;
1313         while(isbranch(t)) {
1314                 __builtin_prefetch(t->ptr);
1315                 i = t->index;
1316                 if(off == Tindex_offset(i) && shf == Tindex_shift(i))
1317                         goto growbranch;
1318                 if(off == Tindex_offset(i) && shf < Tindex_shift(i))
1319                         goto newbranch;
1320                 if(off < Tindex_offset(i))
1321                         goto newbranch;
1322                 Tbitmap b = twigbit(i, key, len);
1323                 assert(hastwig(i, b));
1324                 t = Tbranch_twigs(t) + twigoff(i, b);
1325         }
1326 newbranch:;
1327         Trie *twigs = malloc(sizeof(Trie) * 2);
1328         if(twigs == NULL) return(NULL);
1329         i = Tindex_new(shf, off, nb | tb);
1330         twigs[twigoff(i, nb)] = nt;
1331         twigs[twigoff(i, tb)] = *t;
1332         Tset_twigs(t, twigs);
1333         Tset_index(t, i);
1334         return(tbl);
1335 growbranch:;
1336         assert(!hastwig(i, nb));
1337         uint s, m; TWIGOFFMAX(s, m, i, nb);
1338         twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1));
1339         if(twigs == NULL) return(NULL);
1340         memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s));
1341         memmove(twigs+s, &nt, sizeof(Trie));
1342         Tset_twigs(t, twigs);
1343         Tset_index(t, Tbitmap_add(i, nb));
1344         return(tbl);
1345 }
1346
1347 @* {qp-debug.c}.
1348
1349 @(qp-debug.c@>=
1350 // qp-debug.c: qp trie debug support
1351 //
1352 // Written by Tony Finch <dot@@dotat.at>
1353 // You may do anything with this. It has no warranty.
1354 // <http://creativecommons.org/publicdomain/zero/1.0/>
1355
1356 #include <assert.h>
1357 #include <stdbool.h>
1358 #include <stdint.h>
1359 #include <stdio.h>
1360 #include <stdlib.h>
1361
1362 #include "Tbl.h"
1363 #include "qp.h"
1364
1365 static void
1366 dump_rec(Trie *t, int d) {
1367         if(isbranch(t)) {
1368                 printf("Tdump%*s branch %p %zu %d\n", d, "", t,
1369                     (size_t)t->branch.index, t->branch.flags);
1370                 int dd = 2 + t->branch.index * 4 + (t->branch.flags - 1) * 2;
1371                 assert(dd > d);
1372                 for(uint i = 0; i < 16; i++) {
1373                         uint b = 1 << i;
1374                         if(hastwig(t, b)) {
1375                                 printf("Tdump%*s twig %d\n", d, "", i);
1376                                 dump_rec(twig(t, twigoff(t, b)), dd);
1377                         }
1378                 }
1379         } else {
1380                 printf("Tdump%*s leaf %p\n", d, "", t);
1381                 printf("Tdump%*s leaf key %p %s\n", d, "",
1382                        t->leaf.key, t->leaf.key);
1383                 printf("Tdump%*s leaf val %p\n", d, "",
1384                        t->leaf.val);
1385         }
1386 }
1387
1388 void
1389 Tdump(Tbl *tbl) {
1390         printf("Tdump root %p\n", tbl);
1391         if(tbl != NULL)
1392                 dump_rec(&tbl->root, 0);
1393 }
1394
1395 static void
1396 size_rec(Trie *t, uint d,
1397     size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1398         *rsize += sizeof(*t);
1399         if(isbranch(t)) {
1400                 *rbranches += 1;
1401                 for(uint i = 0; i < 16; i++) {
1402                         Tbitmap b = 1 << i;
1403                         if(hastwig(t, b))
1404                                 size_rec(twig(t, twigoff(t, b)),
1405                                     d+1, rsize, rdepth, rbranches, rleaves);
1406                 }
1407         } else {
1408                 *rleaves += 1;
1409                 *rdepth += d;
1410         }
1411 }
1412
1413 void
1414 Tsize(Tbl *tbl, const char **rtype,
1415     size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1416         *rtype = "qp";
1417         *rsize = *rdepth = *rbranches = *rleaves = 0;
1418         if(tbl != NULL)
1419                 size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves);
1420 }
1421
1422 @* {fn-debug.c}.
1423
1424 @(fn-debug.c@>=
1425 // fn-debug.c: fn trie debug support
1426 //
1427 // Written by Tony Finch <dot@@dotat.at>
1428 // You may do anything with this. It has no warranty.
1429 // <http://creativecommons.org/publicdomain/zero/1.0/>
1430
1431 #include <assert.h>
1432 #include <stdbool.h>
1433 #include <stdint.h>
1434 #include <stdio.h>
1435 #include <stdlib.h>
1436
1437 #include "Tbl.h"
1438 #include "fn.h"
1439
1440 const char *
1441 dump_bitmap(Tbitmap w) {
1442         static char buf[32*3];
1443         int size = (int)sizeof(buf), n = 0;
1444         n += snprintf(buf+n, size-n, "(");
1445         for(uint s = 0; s < 32; s++) {
1446                 Tbitmap b = 1 << s;
1447                 if(w & b)
1448                         n += snprintf(buf+n, size-n, "%u,", s);
1449         }
1450         if(n > 1)
1451                 buf[n-1] = ')';
1452         return buf;
1453 }
1454
1455 static void
1456 dump_rec(Trie *t, uint d) {
1457         Tindex i = t->index;
1458         if(Tindex_branch(i)) {
1459                 printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t,
1460                        dump_bitmap(Tindex_bitmap(i)),
1461                        (size_t)Tindex_offset(i), Tindex_shift(i));
1462                 uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i);
1463                 assert(dd > d);
1464                 for(uint s = 0; s < 32; s++) {
1465                         Tbitmap b = 1 << s;
1466                         if(hastwig(i, b)) {
1467                                 printf("Tdump%*s twig %d\n", d, "", s);
1468                                 dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd);
1469                         }
1470                 }
1471         } else {
1472                 printf("Tdump%*s leaf %p\n", d, "",
1473                        (void *)t);
1474                 printf("Tdump%*s leaf key %p %s\n", d, "",
1475                        (const void *)Tleaf_key(t), Tleaf_key(t));
1476                 printf("Tdump%*s leaf val %p\n", d, "",
1477                        (void *)Tleaf_val(t));
1478         }
1479 }
1480
1481 void
1482 Tdump(Tbl *tbl) {
1483         printf("Tdump root %p\n", (void*)tbl);
1484         if(tbl != NULL)
1485                 dump_rec(tbl, 0);
1486 }
1487
1488 static void
1489 size_rec(Trie *t, uint d,
1490     size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1491         *rsize += sizeof(*t);
1492         Tindex i = t->index;
1493         if(Tindex_branch(i)) {
1494                 *rbranches += 1;
1495                 for(uint s = 0; s < 32; s++) {
1496                         Tbitmap b = 1U << s;
1497                         if(hastwig(i, b))
1498                                 size_rec(Tbranch_twigs(t) + twigoff(i, b),
1499                                     d+1, rsize, rdepth, rbranches, rleaves);
1500                 }
1501         } else {
1502                 *rleaves += 1;
1503                 *rdepth += d;
1504         }
1505 }
1506
1507 void
1508 Tsize(Tbl *tbl, const char **rtype,
1509     size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1510         *rtype = "fn";
1511         *rsize = *rdepth = *rbranches = *rleaves = 0;
1512         if(tbl != NULL)
1513                 size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves);
1514 }
1515
1516 @* {toy.c}.
1517
1518 @(toy.c@>=
1519 #include <assert.h>
1520 #include <errno.h>
1521 #include <stdarg.h>
1522 #include <stdbool.h>
1523 #include <stdio.h>
1524 #include <stdlib.h>
1525 #include <string.h>
1526
1527 #include "Tbl.h"
1528
1529 static void bail(const char *msg, ...)
1530 {
1531   va_list ap;
1532
1533   va_start(ap, msg);
1534   vfprintf(stderr, msg, ap);
1535   va_end(ap);
1536   fputc('\n', stderr);
1537   exit(2);
1538 }
1539
1540 static unsigned long seq = 0;
1541 struct data {
1542   unsigned long seq;
1543   const char *k;
1544 };
1545
1546 int main(int argc, char *argv[])
1547 {
1548   Tbl *t = 0, *tt;
1549   const char *p;
1550   void *q;
1551   struct data *v;
1552   size_t len, size, depth, branches, leaves;
1553   int i;
1554
1555   for (i = 1; i < argc; i++) {
1556     p = argv[i];
1557     switch (*p++) {
1558       case '+':
1559         len = strlen(p) + 1; size = (len + sizeof(*v) + 15)&-16;
1560         v = malloc(size); if (!v) bail("out of memory");
1561         memcpy((char *)v + size - len, p, len);
1562         v->k = (char *)v + size - len; v->seq = seq++;
1563         tt = Tset(t, v->k, v);
1564         if (!tt) bail("Tset failed: %s", strerror(errno));
1565         t = tt; break;
1566       case '-':
1567         v = Tget(t, p); t = Tdel(t, p);
1568         if (v) free(v);
1569         break;
1570       case '?':
1571         v = Tget(t, p);
1572         if (!v) puts("(not found)");
1573         else printf("%lu\n", v->seq);
1574         assert(strcmp(p, v->k) == 0);
1575         break;
1576       case 'D':
1577         Tdump(t);
1578         break;
1579       case 'Z':
1580         Tsize(t, &p, &size, &depth, &branches, &leaves);
1581         printf("impl = %s; size = %zu, depth = %zu, "
1582                "branches = %zu, leaves = %zu\n",
1583                p, size, depth, branches, leaves);
1584         break;
1585       default:
1586         bail("unrecognized command `%c'", *--p);
1587     }
1588   }
1589
1590   while (t) {
1591     p = 0; len = 0; Tnextl(t, &p, &len, &q); v = q; assert(p == v->k);
1592     t = Tdell(t, p, len); free(v);
1593   }
1594
1595   return (0);
1596 }
1597
1598 @ \end{document} \iftrue % gobble following `\fi'