1 %%% -*- mode: latex; mmm-classes: cweb -*-
3 \documentclass[baseclass = strayman]{cweb}
6 \usepackage[T1]{fontenc}
7 \usepackage[utf8]{inputenc}
8 \usepackage[palatino, helvetica, courier, maths = cmr]{mdwfonts}
13 \usetikzlibrary{arrows.meta}
14 \usetikzlibrary{shapes.geometric}
15 \usetikzlibrary{shapes.misc}
18 \def\bitor#1!{\begingroup\let\MOD=\OR#1\endgroup}
19 \def\email#1{\href{mailto:#1}{\nolinkurl{#1}}}
27 \def\fixme#1{\marginpar{\textbf{FIXME!}}\fbox{\textbf{FIXME:} #1}}
30 \def\CwebNumber#1{% % octal, hex, or decimal constant
34 \def\$##1{\egroup\sb{\rm##1}\bgroup}% suffix to constant
35 \def\_{\cdot 10^{\aftergroup}}% power of ten (via dirty trick)
36 \let\~\cwbb@@oct \let\^\cwbb@@hex
40 \def\CwebSectLayout{\normalfont\sffamily\bfseries}
41 \def\CwebPointer{\mathord{\rightarrow}}
44 {> = {Computer Modern Rightarrow[length = 5pt, width = 7pt]},
45 algstep/.style = {draw, align = center, inner sep = 6pt},
46 algcond/.style = {algstep, shape = rounded rectangle, aspect = 2}}
49 \author{Tony Finch \and Mark Wooding}
56 %% hack formatting for various identifiers
61 %%%--------------------------------------------------------------------------
66 The code here is all Tony's. I haven't even reformatted it, though
67 \texttt{CWEAVE} applies its own formatting.
69 %%%--------------------------------------------------------------------------
74 More strictly, this is a notice about a lack of copyright. The qp-trie code
75 is, to the maximum extent permitted, not protected by copyright. Some
76 jurisdictions allow authors to abandon their copyrights explicitly; others
80 /* Written by Tony Finch \email{dot@@dotat.at} \\
81 You may do anything with this. It has no warranty. \\
82 \url{http://creativecommons.org/publicdomain/zero/1.0/}
88 A \emph{table} is an abstract data type which maintains a dynamic mapping
89 from null-terminated strings to non-null |void *| pointers.
91 The caller is responsible for managing the the key and value storage. Value
92 pointers must be word-aligned: our tables will make use of low-order pointer
93 bits to store tags. General-purpose allocators like |malloc| will return
94 suitably aligned memory on conventional target platforms; modifications will
95 need to be made to support word-addressed machines which lack space for tag
98 The interface is defined in a header file \texttt{Tbl.h}.
101 // Tbl.h: an abstract API for tables with string keys.
107 @<\texttt{Tbl.h} declarations@>
111 @ The interface includes a number of convenience functions which can be
112 implemented in terms of other functions, rather than by each individual table
113 data structure. These common implementations are given in \texttt{Tbl.c}.
116 // Tbl.c: simpler wrappers for core table functions
125 @<Common table utilities@>
127 @ A table is represented by a pointer to the incomplete |struct| type |Tbl|.
128 There's no function to initialize a table: an empty table is simply
129 represented as a null pointer. Table pointers are passed by value; functions
130 such as |Tset| which update the table return the new pointer.
132 @<\texttt{Tbl.h} declarations@>=
133 typedef struct Tbl Tbl;
135 @ The |Tget|\ldots\ functions retrieve the information assoicated with a key.
136 The simplest function is |Tget|, which takes pointers to a table and a
137 null-terminated key string, and returns the associated value pointer; if
138 there is no entry for the given key, then |Tget| returns null. It's not
139 possible to associate a key with a null pointer, so this is not ambiguous.
141 If you already have the string's length lying around for some reason, you can
142 call |Tgetl| instead, passing the length along. The length does not include
143 the null terminator, which must still be present.
145 @<\texttt{Tbl.h} declarations@>=
146 void *Tgetl(Tbl *tbl, const char *key, size_t klen);
147 void *Tget(Tbl *tbl, const char *key);
149 @ These functions are both implemented in terms of the primitive |Tgetkv|.
150 If an entry for the key string is found in the table, then |Tgetkv| stores
151 pointers to the original key string and the associated value in |*rkey| and
152 |*rval|, respectively, and returns |true|; otherwise, it returns |false|
153 leaving |*rkey| and |*rval| unchanged.
155 @<\texttt{Tbl.h} declarations@>=
156 bool Tgetkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval);
158 @ The implementations of |Tget| and |Tgetl| are trivial.
159 @<Common table utilities@>=
161 Tgetl(Tbl *tbl, const char *key, size_t len) {
162 const char *rkey = NULL;
164 if(Tgetkv(tbl, key, len, &rkey, &rval))
171 Tget(Tbl *tbl, const char *key) {
172 return(Tgetl(tbl, key, strlen(key)));
175 @ The |Tset|\ldots\ functions add or modify entries in the table. They
176 return the (possibly modified) table pointer. The |Tset| function takes a
177 pointer to a null-terminated key string and the |void *| value pointer to
178 associate with it. If the table already contains an entry for the key, then
179 the entry is updated to associate the new value with the key; the old value
180 pointer is lost. Otherwise, a new entry is added to the table, associating
181 the key with the value. If the value pointer is null, then any existing
182 entry for the key is removed, as if |Tdel| had been called.
184 The |Tsetl| function is the same, except that it also expects to be given the
185 length of the key string. As for |Tgetl| above, the length doesn't include
186 the null terminator, which must still be present.
188 Updating the table can fail. Failures are reported by setting the global
189 variable |errno| to a nonzero value and returning a null pointer. The
190 possible |errno| values are as follows.
192 \item[|EINVAL|] The value pointer is not word-aligned.
193 \item[|ENOMEM|] There was insufficient memory to extend the table.
196 This means that the |Tset|\ldots\ functions can return null in two different
199 \item If the value pointer is not null, then they return null if updating the
200 table fails. In this case, they set |errno| to a nonzero value to indicate
202 \item If the value pointer is null, then they return null if the table is now
203 empty. This is a successful outcome; |errno| is not changed.
206 @<\texttt{Tbl.h} declarations@>=
207 Tbl *Tsetl(Tbl *tbl, const char *key, size_t klen, void *value);
208 Tbl *Tset(Tbl *tbl, const char *key, void *value);
210 @ The |Tset| function is implemented in terms of |Tsetl| in the obvious way.
211 The |Tsetl| function is primitive, implemented by the data structure.
213 @<Common table utilities@>=
215 Tset(Tbl *tbl, const char *key, void *value) {
216 return(Tsetl(tbl, key, strlen(key), value));
219 @ The |Tdel|\ldots\ functions remove entries from the table. They return the
220 new table pointer, which will be null if the table is now empty. Following
221 the now-familiar pattern, the |Tdel| function takes pointers to a table and a
222 null-terminated key string; the |Tdell| function is additionally passed the
223 length of the string.
225 @<\texttt{Tbl.h} declarations@>=
226 Tbl *Tdell(Tbl *tbl, const char *key, size_t klen);
227 Tbl *Tdel(Tbl *tbl, const char *key);
229 @ The |Tdel| and |Tdell| functions are implemented in terms of the primitive
230 |Tdelkv|, which additionally saves pointers to the entry's original key and
231 value in |*rkey| and |*rvalue| so that their storage can be reclaimed.
233 @<\texttt{Tbl.h} declarations@>=
234 Tbl *Tdelkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval);
236 @ The implementations of |Tdel| and |Tdell| are straightforward.
238 @<Common table utilities@>=
240 Tdell(Tbl *tbl, const char *key, size_t len) {
241 const char *rkey = NULL;
243 return(Tdelkv(tbl, key, len, &rkey, &rval));
247 Tdel(Tbl *tbl, const char *key) {
248 return(Tdell(tbl, key, strlen(key)));
251 @ We next turn to enumerating the entries in a table. The |Tnext| function
252 is given a pointer to a table, and the addresses |pkey| and |pvalue| of
253 key-string and value pointers. On entry, |*pkey| must point to a key string
254 for which an entry is present in the table; on exit, |*pkey| and |*pvalue|
255 are updated to point to the next key, in lexicographic order, for which there
256 is an entry in the table, |*pval| is set to the associated value pointer, and
257 |true| is returned. If no such entry exists, then |*pkey| is set to null and
258 |false| is returned. As a special case, if |*pkey| is null on entry, then
259 the key and value for the table entry whose key is lexicographically first is
260 found; if the table is entry, then, again, |false| is returned. The initial
261 value of |*pvalue| is ignored.
263 The |Tnextl| function is similar, except that it also stores the length of
264 the key string in |*pklen|. The |Tnxt| function has a simpler interface,
265 intended for debugging: it receives a key string pointer, rather than its
266 address, as an argument, and returns a pointer to the lexicographically next
267 key string present in the table, or null if no such string exists; it doesn't
268 report the associated value pointer at all.
270 @<\texttt{Tbl.h} declarations@>=
271 bool Tnextl(Tbl *tbl, const char **pkey, size_t *pklen, void **pvalue);
272 bool Tnext(Tbl *tbl, const char **pkey, void **pvalue);
273 const char *Tnxt(Tbl *tbl, const char *key);
275 @ The |Tnext| and |Tnxt| functions are implemented in terms of |Tnextl| in
278 @<Common table utilities@>=
280 Tnext(Tbl *tbl, const char **pkey, void **pvalue) {
281 size_t len = *pkey == NULL ? 0 : strlen(*pkey);
282 return(Tnextl(tbl, pkey, &len, pvalue));
286 Tnxt(Tbl *tbl, const char *key) {
288 Tnext(tbl, &key, &value);
292 @ Finally, we define some diagnostic tools.
294 The |Tsize| function reports statistics about a table. Specifically, it
295 stores information in the addresses given as its arguments:
297 \item[|*rtype|] A statically allocated string describing the table
299 \item[|*size|] The total memory allocated by the table. This doesn't include
300 keys or values, because they're allocated by the caller.
301 \item[|*rdepth|] The total depth of the search structure.
302 \fixme{why is this an interesting thing to know?}
303 \item[|*rbranch|] The number of branch nodes in the search structure.
304 \item[|*rleaves|] The number of leaf nodes in the search structure.
307 The |Tdump| function prints a diagnostic dump of a table to standard output.
309 Both of these functions are primitive, implemented separately for each data
312 @<\texttt{Tbl.h} declarations@>=
313 void Tdump(Tbl *tbl);
314 void Tsize(Tbl *tbl, const char **rtype, @|
315 size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves);
319 %%%--------------------------------------------------------------------------
324 QP-tries are a data structure for keeping track of a dynamic set of
325 \emph{strings}. It'll be best if we don't think of these strings as being
326 made up of bytes or characters: those units are too large to be convenient.
327 Instead, we'll work with a smaller set~$\Sigma$ of \emph{symbols}, which will
328 usually be small fixed-size groups of bits, say four or five. The limiting
329 factor will be that the number of symbols $\card\Sigma$ must be at most equal
330 to the machine's word size $w$.
332 There's another constraint, which is that the set of strings that we work
333 with must be \emph{prefix-free}. This usually most easily arranged by ending
334 each string with a \emph{terminating symbol}. For C~strings, the obvious
335 choice is a null character, since C has a preference for null-terminated
338 @ The trie \emph{root} points at a node. The trie structure consists of two
339 kinds of nodes called \emph{branch} nodes and \emph{leaf} nodes. Both kinds
340 of nodes are the same size. Both kinds of notes have a \emph{tag bit} which
341 is clear in leaf nodes and set in branch nodes. (The opposite convention is
342 possible, but doesn't work as well in practice.)
344 In broad outline, searching QP-trie for a key proceeds as follows. We start
345 at the root. A branch node has children corresponding to the symbols in
346 $\Sigma$: if we encounter one, we move to the child selected by one of the
347 symbols of the search key. A leaf node holds a key and its associated value:
348 if we encounter one, then we compare the search key against the key in the
349 leaf, and return the associated value if keys match, or report failure if
352 The trie is compressed in two ways.
354 \item Branch nodes which would have fewer than two children are elided.
355 Branch nodes contain an \emph{offset} to indicate which of the search key's
357 \item Children which don't have leaves are omitted. A branch node contains a
358 \emph{bitmap}, indexed by symbols in some convenient ordering: the bit
359 corresponding to a symbol is set if the node has a corresponding child. A
360 branch node also holds a pointer to a \emph{twigs} vector, which simply
361 holds the node's remaining children, in order.
363 The size of a branch node's twigs vector is equal to the Hamming weight --
364 i.e., the number of set bits -- of its bitmap~$B$. Moreover, the index of
365 the child corresponding to a symbol $s \in \Sigma$, if one exists, is equal
366 to the number of present children corresponding to symbols preceding~$s$ in
367 the ordering, which can conveniently be computed by
368 \[ \hwt\bigparens{B \bitand \bigparens{(1 \lsl s) - 1}} \mpunct, \]
369 where $\hwt(\cdot)$ denotes Hamming weight.
371 This leads to the first algorithm, for lookup in a QP-trie.
374 {\textbf{Algorithm L} \emph{(Lookup in a QP-trie.)}}
375 {\leftmargin=0pt \rightmargin=0pt \labelwidth=-\labelsep}
377 Given a QP-trie root pointer $\texttt{ROOT}$ and a key string~$k$ of
378 length~$n$, find the value associated with~$k$ in the trie.
380 Every node~$\texttt{N}$ has a tag bit $\texttt{TAG(N)}$. If
381 $\texttt{TAG(N)} = 0$ then $\texttt{N}$ points to a leaf node, which has
382 $\texttt{KEY}$ and $\texttt{VALUE}$ fields. If $\texttt{TAG(N)} = 1$ then
383 $\texttt{N}$ points to a branch node, which has $\texttt{OFFSET}$,
384 $\texttt{BITMAP}$, and $\texttt{TWIGS}$ fields.
386 \begin{enumerate}[\bfseries L1.]
387 \item{} [Empty tree?]
388 If $\texttt{ROOT} = \NULL$, then return failure.
389 \item{} [Initialize.]
390 Set $\texttt{N} \gets \texttt{ROOT}$.
391 \item{} [Check tag.] \label{en:qp.lookup.chk-tag}
392 If $\texttt{TAG(N)} = 0$ then then $\texttt{N}$ points to a leaf node: go to
393 step~L\ref{en:qp.lookup.cmp-key}. Otherwise, it points to a branch node: go
394 to step~L\ref{en:qp.lookup.chk-len}.
395 \item{} [Check length.] \label{en:qp.lookup.chk-len}
396 If $\texttt{OFFSET(N)} \ge n$ then return failure. Otherwise, continue
398 \item{} [Check symbol.]
399 Let $s \gets k[\texttt{OFFSET(N)}]$. If $\texttt{BITMAP(N)} \bitand (1 \lsl
400 s) = 0$ then return failure. Otherwise, set $\texttt{N}$ to be the address
401 of $\texttt{TWIGS(N)}[\hwt(\texttt{BITMAP(N)} \bitand ((1 \lsl s) - 1))]$
402 and go to step~L\ref{en:qp.lookup.chk-tag}.
403 \item{} [Compare key.] \label{en:qp.lookup.cmp-key}
404 If the strings $k$ and $\texttt{KEY(N)}$ are equal then return
405 $\texttt{VALUE(N)}$ successfully; otherwise return failure.
414 \node[algcond] (emptyp) at (0, 8) {L1. Empty tree?};
415 \node[algstep] (init) at (0, 6) {L2. Initialize};
416 \node[algcond] (leafp) at (0, 4) {L3. Check tag};
417 \node[algcond] (len) at (1, 5.5) {L4. Check length};
418 \node[algcond] (sym) at (1, 3) {L5. Check symbol};
419 \node[algcond] (key) at (0, 2) {L6. Compare key};
420 \node (lose) at (2, 4) {\texttt{FAILURE}};
421 \node (win) at (0, 0) {\texttt{SUCCESS}};
424 edge node[left] {$\texttt{ROOT} \ne \NULL$} (init)
425 edge [out = 0, in = 140]
426 node[above, sloped] {$\texttt{ROOT} = \NULL$} (lose)
430 edge node[left] {$\texttt{TAG(N)} = 0$} (key)
431 edge node[sloped, above] {$\texttt{TAG(N)} = 1$} (len)
433 edge node[left] {$\texttt{OFFSET(N)} < n$} (sym)
434 edge node[above, sloped] {$\texttt{OFFSET(N)} \ge n$} (lose)
436 edge node[below, sloped, align = center]
437 {bit $s$ set in \\ $\texttt{BITMAP(N)}$} (leafp)
438 edge node[below, sloped, align = center]
439 {bit $s$ clear in \\ $\texttt{BITMAP(N)}$} (lose)
441 edge node[left] {$k$ matches $\texttt{KEY(N)}$} (win)
442 edge [out = 0, in = -125]
443 node[below] {$k$ doesn't match $\texttt{KEY(N)}$} (lose);
445 \caption{QP-trie lookup algorithm} \label{fig:qp.lookup}
474 // qp.h: tables implemented with quadbit popcount patricia tries.
476 // Written by Tony Finch <dot@@dotat.at>
477 // You may do anything with this. It has no warranty.
478 // <http://creativecommons.org/publicdomain/zero/1.0/>
480 // In a trie, keys are divided into digits depending on some radix
481 // e.g. base 2 for binary tries, base 256 for byte-indexed tries.
482 // When searching the trie, successive digits in the key, from most to
483 // least significant, are used to select branches from successive
484 // nodes in the trie, like:
485 // |for(i = 0; isbranch(node); i++) node = node->branch[key[i]];|
486 // All of the keys in a subtrie have identical prefixes. Tries do not
487 // need to store keys since they are implicit in the structure.
489 // A patricia trie or crit-bit trie is a binary trie which omits nodes that
490 // have only one child. Nodes are annotated with the index of the bit that
491 // is used to select the branch; indexes always increase as you go further
492 // into the trie. Each leaf has a copy of its key so that when you find a
493 // leaf you can verify that the untested bits match.
495 // The popcount() function counts the number of bits that are set in
496 // a word. It's also known as the Hamming weight; Knuth calls it
497 // "sideways add". https://en.wikipedia.org/wiki/popcount
499 // You can use popcount() to implement a sparse array of length N
500 // containing M < N members using bitmap of length N and a packed
501 // vector of M elements. A member i is present in the array if bit
502 // i is set, so M == popcount(bitmap). The index of member i in
503 // the packed vector is the popcount of the bits preceding i.
505 // |if(bitmap & mask)|
506 // |member = vector[popcount(bitmap & mask-1)]|
508 // See "Hacker's Delight" by Hank Warren, section 5-1 "Counting 1
509 // bits", subsection "applications". http://www.hackersdelight.org
511 // Phil Bagwell's hashed array-mapped tries (HAMT) use popcount for
512 // compact trie nodes. String keys are hashed, and the hash is used
513 // as the index to the trie, with radix $2^{32}$ or $2^{64}$.
514 // http://infoscience.epfl.ch/record/64394/files/triesearches.pdf
515 // http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf
517 // A qp trie uses its keys a quadbit (or nibble or half-byte) at a
518 // time. It is a radix $2^4$ patricia trie, so each node can have between
519 // 2 and 16 children. It uses a 16 bit word to mark which children are
520 // present and popcount to index them. The aim is to improve on crit-bit
521 // tries by reducing memory usage and the number of indirections
522 // required to look up a key.
524 // The worst case for a qp trie is when each branch has 2 children;
525 // then it is the same shape as a crit-bit trie. In this case there
526 // are n-1 internal branch nodes of two words each, so it is equally
527 // efficient as a crit-bit trie. If the key space is denser then
528 // branches have more children but the same overhead, so the memory
529 // usage is less. For maximally dense tries the overhead is:
531 // key length (bytes) $n$
532 // number of leaves $256^n$
533 // crit-bit branches $256^n - 1$
534 // qp branches $1 + 16^{2n-1} = 1 + 256^n / 16$
535 // crit-bit depth $8 n$
538 // In practice, qp averages about 3.3 words per leaf vs. crit-bit's 4
539 // words per leaf, and qp has about half the depth.
541 typedef unsigned char byte;
542 typedef unsigned int uint;
544 typedef uint Tbitmap;
546 #if defined(HAVE_NARROW_CPU) || defined(HAVE_SLOW_POPCOUNT)
548 // NOTE: 16 bits only
551 popcount(Tbitmap w) {
552 w -= (w >> 1) & 0x5555;
553 w = (w & 0x3333) + ((w >> 2) & 0x3333);
554 w = (w + (w >> 4)) & 0x0F0F;
555 w = (w + (w >> 8)) & 0x00FF;
562 popcount(Tbitmap w) {
563 return((uint)__builtin_popcount(w));
568 // Parallel popcount of the top and bottom 16 bits in a 32 bit word. This
569 // is probably only a win if your CPU is short of registers and/or integer
570 // units. NOTE: The caller needs to extract the results by masking with
571 // 0x00FF0000 and 0x000000FF for the top and bottom halves.
574 popcount16x2(uint w) {
575 w -= (w >> 1) & 0x55555555;
576 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
577 w = (w + (w >> 4)) & 0x0F0F0F0F;
582 // A trie node is two words on 64 bit machines, or three on 32 bit
583 // machines. A node can be a leaf or a branch. In a leaf, the value
584 // pointer must be word-aligned to allow for the tag bits.
586 typedef struct Tleaf {
591 // Branch nodes are distinguished from leaf nodes using a couple
592 // of flag bits which act as a dynamic type tag. They can be:
594 // 0 -> node is a leaf
595 // 1 -> node is a branch, testing upper nibble
596 // 2 -> node is a branch, testing lower nibble
598 // A branch node is laid out so that the flag bits correspond to the
599 // least significant bits bits of one of the leaf node pointers. In a
600 // leaf node, that pointer must be word-aligned so that its flag bits
601 // are zero. We have chosen to place this restriction on the value
604 // A branch contains the index of the byte that it tests. The combined
605 // value \bitor|index << 2 % flags|! increases along the key in big-endian
606 // lexicographic order, and increases as you go deeper into the trie.
607 // All the keys below a branch are identical up to the nibble
608 // identified by the branch.
610 // A branch has a bitmap of which subtries ("twigs") are present. The
611 // flags, index, and bitmap are packed into one word. The other word
612 // is a pointer to an array of trie nodes, one for each twig that is
615 // XXX We hope that the compiler will not punish us for abusing unions.
617 // XXX This currently assumes a 64 bit little endian machine.
618 // On a 32 bit machine we could perhaps fit a branch in to two words
619 // without restricting the key length by making the index relative
620 // instead of absolute. If the gap between nodes is larger than a 16
621 // bit offset allows, we can insert a stepping-stone branch with only
622 // one twig. This would make the code a bit more complicated...
624 typedef struct Tbranch {
634 struct Tbranch branch;
641 // Test flags to determine type of this node.
645 return(t->branch.flags != 0);
648 // Make a bitmask for testing a branch bitmap.
651 // 1 -> 0xffff -> 0xfff0 -> 0xf0
652 // 2 -> 0x0000 -> 0x000f -> 0x0f
658 static inline Tbitmap
659 nibbit(byte k, uint flags) {
660 uint mask = ((flags - 2) ^ 0x0f) & 0xff;
661 uint shift = (2 - flags) << 2;
662 return(1 << ((k & mask) >> shift));
665 // Extract a nibble from a key and turn it into a bitmask.
667 static inline Tbitmap
668 twigbit(Trie *t, const char *key, size_t len) {
669 uint64_t i = t->branch.index;
670 if(i >= len) return(1);
671 return(nibbit((byte)key[i], t->branch.flags));
675 hastwig(Trie *t, Tbitmap bit) {
676 return(t->branch.bitmap & bit);
680 twigoff(Trie *t, Tbitmap b) {
681 return(popcount(t->branch.bitmap & (b-1)));
685 twig(Trie *t, uint i) {
686 return(&t->branch.twigs[i]);
689 #ifdef HAVE_NARROW_CPU
691 #define TWIGOFFMAX(off, max, t, b) do { \
692 Tbitmap bitmap = t->branch.bitmap; \
693 uint word = (bitmap << 16) | (bitmap & (b-1)); \
694 uint counts = popcount16x2(word); \
695 off = counts & 0xFF; \
696 max = (counts >> 16) & 0xFF; \
701 #define TWIGOFFMAX(off, max, t, b) do { \
702 off = twigoff(t, b); \
703 max = popcount(t->branch.bitmap); \
711 // qp.c: tables implemented with quadbit popcount patricia tries.
713 // Written by Tony Finch <dot@@dotat.at>
714 // You may do anything with this. It has no warranty.
715 // <http://creativecommons.org/publicdomain/zero/1.0/>
728 Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
731 Trie *t = &tbl->root;
733 __builtin_prefetch(t->branch.twigs);
734 Tbitmap b = twigbit(t, key, len);
737 t = twig(t, twigoff(t, b));
739 if(strcmp(key, t->leaf.key) != 0)
747 next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) {
749 // Recurse to find either this leaf (*pkey != NULL)
750 // or the next one (*pkey == NULL).
751 Tbitmap b = twigbit(t, *pkey, *plen);
752 uint s, m; TWIGOFFMAX(s, m, t, b);
754 if(next_rec(twig(t, s), pkey, plen, pval))
758 // We have found the next leaf.
761 *plen = strlen(*pkey);
765 // We have found this leaf, so start looking for the next one.
766 if(strcmp(*pkey, t->leaf.key) == 0) {
776 Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) {
782 return(next_rec(&tbl->root, pkey, plen, pval));
786 Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
789 Trie *t = &tbl->root, *p = NULL;
792 __builtin_prefetch(t->branch.twigs);
793 b = twigbit(t, key, len);
796 p = t; t = twig(t, twigoff(t, b));
798 if(strcmp(key, t->leaf.key) != 0)
806 t = p; p = NULL; // Becuase t is the usual name
807 uint s, m; TWIGOFFMAX(s, m, t, b);
809 // Move the other twig to the parent branch.
810 Trie *twigs = t->branch.twigs;
815 memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1));
816 t->branch.bitmap &= ~b;
817 // We have now correctly removed the twig from the trie, so if
818 // realloc() fails we can ignore it and continue to use the
819 // slightly oversized twig array.
820 Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1));
821 if(twigs != NULL) t->branch.twigs = twigs;
826 Tsetl(Tbl *tbl, const char *key, size_t len, void *val) {
827 // Ensure flag bits are zero.
828 if(((uint64_t)val & 3) != 0) {
833 return(Tdell(tbl, key, len));
834 // First leaf in an empty tbl?
836 tbl = malloc(sizeof(*tbl));
837 if(tbl == NULL) return(NULL);
838 tbl->root.leaf.key = key;
839 tbl->root.leaf.val = val;
842 Trie *t = &tbl->root;
843 // Find the most similar leaf node in the trie. We will compare
844 // its key with our new key to find the first differing nibble,
845 // which can be at a lower index than the point at which we
846 // detect a difference.
848 __builtin_prefetch(t->branch.twigs);
849 Tbitmap b = twigbit(t, key, len);
850 // Even if our key is missing from this branch we need to
851 // keep iterating down to a leaf. It doesn't matter which
852 // twig we choose since the keys are all the same up to this
853 // index. Note that blindly using twigoff(t, b) can cause
854 // an out-of-bounds index if it equals twigmax(t).
855 uint i = hastwig(t, b) ? twigoff(t, b) : 0;
858 // Do the keys differ, and if so, where?
860 for(i = 0; i <= len; i++) {
861 if(key[i] != t->leaf.key[i])
866 newkey:; // We have the branch's index; what are its flags?
867 byte k1 = (byte)key[i], k2 = (byte)t->leaf.key[i];
869 f = (f & 0xf0) ? 1 : 2;
870 // Prepare the new leaf.
871 Tbitmap b1 = nibbit(k1, f);
872 Trie t1 = { .leaf = { .key = key, .val = val } };
873 // Find where to insert a branch or grow an existing branch.
876 __builtin_prefetch(t->branch.twigs);
877 if(i == t->branch.index && f == t->branch.flags)
879 if(i == t->branch.index && f < t->branch.flags)
881 if(i < t->branch.index)
883 Tbitmap b = twigbit(t, key, len);
884 assert(hastwig(t, b));
885 t = twig(t, twigoff(t, b));
888 Trie *twigs = malloc(sizeof(Trie) * 2);
889 if(twigs == NULL) return(NULL);
890 Trie t2 = *t; // Save before overwriting.
891 Tbitmap b2 = nibbit(k2, f);
892 t->branch.twigs = twigs;
895 t->branch.bitmap = b1 | b2;
896 *twig(t, twigoff(t, b1)) = t1;
897 *twig(t, twigoff(t, b2)) = t2;
900 assert(!hastwig(t, b1));
901 uint s, m; TWIGOFFMAX(s, m, t, b1);
902 twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1));
903 if(twigs == NULL) return(NULL);
904 memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s));
905 memmove(twigs+s, &t1, sizeof(Trie));
906 t->branch.twigs = twigs;
907 t->branch.bitmap |= b1;
914 // fn.h: quintet bit popcount patricia tries, new version
916 // This version uses somewhat different terminology than older
917 // variants. The location of a quintet in the key is now called its
918 // "offset", and the whole word containing the offset, bitmap, and tag
919 // bit is called the "index word" (by analogy with a database index).
920 // The precise quintet location is represented as a byte offset and a
921 // shift. Previously a flags field contained the isbranch tag and shift,
922 // but these are now separate.
924 // Instead of trying to use bit fields, this code uses accessor
925 // functions to split up a pair of words into their constituent parts.
926 // This should improve portability to machines with varying endianness
929 // Written by Tony Finch <dot@@dotat.at>
930 // You may do anything with this. It has no warranty.
931 // <http://creativecommons.org/publicdomain/zero/1.0/>
933 typedef unsigned char byte;
934 typedef unsigned int uint;
936 typedef uint32_t Tbitmap;
937 typedef uint64_t Tindex;
939 const char *dump_bitmap(Tbitmap w);
947 word_up(const char *p) {
948 uint w = byte_me(p[0]) << 8;
949 if(w) w |= byte_me(p[1]);
953 #if defined(HAVE_SLOW_POPCOUNT)
956 popcount(Tbitmap w) {
957 w -= (w >> 1) & 0x55555555;
958 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
959 w = (w + (w >> 4)) & 0x0F0F0F0F;
960 w = (w * 0x01010101) >> 24;
967 popcount(Tbitmap w) {
968 return((uint)__builtin_popcount(w));
978 // accessor functions, except for the index word
980 #define Tset_field(cast, elem, type, field) \
982 Tset_##field(Trie *t, type field) { \
983 t->elem = cast field; \
987 Tset_field((void *), ptr, Trie *, twigs);
988 Tset_field((Tindex), index, Tindex, index);
989 Tset_field((void *)(uint64_t), ptr, const char *, key);
990 Tset_field((Tindex), index, void *, val);
992 static inline bool Tindex_branch(Tindex i);
994 static inline bool isbranch(Trie *t) {
995 return(Tindex_branch(t->index));
998 #ifdef WITH_EXTRA_CHECKS
999 #define Tbranch(t) assert(isbranch(t))
1000 #define Tleaf(t) assert(!isbranch(t))
1006 #define Tcheck_get(type, tag, field, expr) \
1007 static inline type \
1008 tag##_##field(Trie *t) { \
1014 Tcheck_get(Trie *, Tbranch, twigs, t->ptr);
1015 Tcheck_get(const char *, Tleaf, key, t->ptr);
1016 Tcheck_get(void *, Tleaf, val, (void*)t->index);
1018 // index word layout
1020 #define Tix_width_branch 1
1021 #define Tix_width_shift 3
1022 #define Tix_width_offset 28
1023 #define Tix_width_bitmap 32
1025 #define Tix_base_branch 0
1026 #define Tix_base_shift (Tix_base_branch + Tix_width_branch)
1027 #define Tix_base_offset (Tix_base_shift + Tix_width_shift)
1028 #define Tix_base_bitmap (Tix_base_offset + Tix_width_offset)
1030 #define Tix_place(field) ((Tindex)(field) << Tix_base_##field)
1032 #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL)
1034 #define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field) \
1037 #define Tmaxlen Tix_mask(offset)
1039 // index word accessor functions
1041 #define Tindex_get(type, field) \
1042 static inline type \
1043 Tindex_##field(Tindex i) { \
1044 return(Tunmask(field, i)); \
1048 Tindex_get(bool, branch);
1049 Tindex_get(uint, shift);
1050 Tindex_get(uint, offset);
1051 Tindex_get(Tbitmap, bitmap);
1053 static inline Tindex
1054 Tindex_new(uint shift, uint offset, Tbitmap bitmap) {
1056 return( Tix_place(branch) |
1059 Tix_place(bitmap) );
1062 static inline Tindex
1063 Tbitmap_add(Tindex i, Tbitmap bitmap) {
1064 return(i | Tix_place(bitmap));
1067 static inline Tindex
1068 Tbitmap_del(Tindex i, Tbitmap bitmap) {
1069 return(i & ~Tix_place(bitmap));
1074 #ifndef static_assert
1075 #define static_assert_cat(a,b) a##b
1076 #define static_assert_name(line) static_assert_cat(static_assert_,line)
1077 #define static_assert(must_be_true,message) \
1078 static const void *static_assert_name(__LINE__) \
1079 [must_be_true ? 2 : -1] = { \
1081 &static_assert_name(__LINE__) }
1084 static_assert(Tix_base_bitmap + Tix_width_bitmap == 64,
1085 "index fields must fill a 64 bit word");
1087 static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678,
1088 "extracting the bitmap works");
1090 static_assert(Tunmask(offset,0x0420ULL) == 0x42,
1091 "extracting the offset works");
1093 static_assert(Tunmask(shift,0xFEDCBAULL) == 5,
1094 "extracting the shift works");
1096 @=// ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4]..@>
1098 @=// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0@>
1099 @=// | | | | | | | | |@>
1100 @=// shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3@>
1103 knybble(const char *key, uint off, uint shift) {
1104 uint word = word_up(key+off);
1105 uint right = 16 - 5 - shift;
1106 return((word >> right) & 0x1FU);
1110 nibble(Tindex i, const char *key, size_t len) {
1111 uint off = Tindex_offset(i);
1112 if(off >= len) return(0);
1113 else return(knybble(key, off, Tindex_shift(i)));
1116 static inline Tbitmap
1117 twigbit(Tindex i, const char *key, size_t len) {
1118 return(1U << nibble(i, key, len));
1122 hastwig(Tindex i, Tbitmap bit) {
1123 return(Tindex_bitmap(i) & bit);
1127 twigoff(Tindex i, Tbitmap bit) {
1128 return(popcount(Tindex_bitmap(i) & (bit-1)));
1131 #define TWIGOFFMAX(off, max, i, b) do { \
1132 off = twigoff(i, b); \
1133 max = popcount(Tindex_bitmap(i)); \
1139 // fn.h: quintet bit popcount patricia tries, new version
1141 // Written by Tony Finch <dot@@dotat.at>
1142 // You may do anything with this. It has no warranty.
1143 // <http://creativecommons.org/publicdomain/zero/1.0/>
1147 #include <stdbool.h>
1156 Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) {
1159 while(isbranch(t)) {
1160 __builtin_prefetch(t->ptr);
1161 Tindex i = t->index;
1162 Tbitmap b = twigbit(i, key, len);
1165 t = Tbranch_twigs(t) + twigoff(i, b);
1167 if(strcmp(key, Tleaf_key(t)) != 0)
1169 *pkey = Tleaf_key(t);
1170 *pval = Tleaf_val(t);
1175 next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) {
1176 Tindex i = t->index;
1177 if(Tindex_branch(i)) {
1178 // Recurse to find either this leaf (*pkey != NULL)
1179 // or the next one (*pkey == NULL).
1180 Tbitmap b = twigbit(i, *pkey, *plen);
1181 uint s, m; TWIGOFFMAX(s, m, i, b);
1183 if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval))
1187 // We have found the next leaf.
1189 *pkey = Tleaf_key(t);
1190 *plen = strlen(*pkey);
1191 *pval = Tleaf_val(t);
1194 // We have found this leaf, so start looking for the next one.
1195 if(strcmp(*pkey, Tleaf_key(t)) == 0) {
1205 Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) {
1211 return(next_rec(tbl, pkey, plen, pval));
1215 Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) {
1218 Trie *t = tbl, *p = NULL;
1221 while(isbranch(t)) {
1222 __builtin_prefetch(t->ptr);
1224 b = twigbit(i, key, len);
1227 p = t; t = Tbranch_twigs(t) + twigoff(i, b);
1229 if(strcmp(key, Tleaf_key(t)) != 0)
1231 *pkey = Tleaf_key(t);
1232 *pval = Tleaf_val(t);
1237 Trie *twigs = Tbranch_twigs(p);
1238 uint m = popcount(Tindex_bitmap(i));
1239 assert(twigs <= t && t < twigs+m);
1241 // Move the other twig to the parent branch.
1242 *p = twigs[twigs == t];
1246 memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie));
1247 p->index = Tbitmap_del(i, b);
1248 // We have now correctly removed the twig from the trie, so if
1249 // realloc() fails we can ignore it and continue to use the
1250 // slightly oversized twig array.
1251 twigs = realloc(twigs, sizeof(Trie) * (m - 1));
1252 if(twigs != NULL) Tset_twigs(p, twigs);
1257 Tsetl(Tbl *tbl, const char *key, size_t len, void *val) {
1258 if(Tindex_branch((Tindex)val) || len > Tmaxlen) {
1263 return(Tdell(tbl, key, len));
1264 // First leaf in an empty tbl?
1266 tbl = malloc(sizeof(*tbl));
1267 if(tbl == NULL) return(NULL);
1273 // Find the most similar leaf node in the trie. We will compare
1274 // its key with our new key to find the first differing nibble,
1275 // which can be at a lower index than the point at which we
1276 // detect a difference.
1277 while(isbranch(t)) {
1278 __builtin_prefetch(t->ptr);
1279 Tindex i = t->index;
1280 Tbitmap b = twigbit(i, key, len);
1281 // Even if our key is missing from this branch we need to
1282 // keep iterating down to a leaf. It doesn't matter which
1283 // twig we choose since the keys are all the same up to this
1284 // index. Note that blindly using twigoff(t, b) can cause
1285 // an out-of-bounds index if it equals twigmax(t).
1286 uint s = hastwig(i, b) ? twigoff(i, b) : 0;
1287 t = Tbranch_twigs(t) + s;
1289 // Do the keys differ, and if so, where?
1291 const char *tkey = Tleaf_key(t);
1292 for(off = 0; off <= len; off++) {
1293 xor = (byte)key[off] ^ (byte)tkey[off];
1294 if(xor != 0) goto newkey;
1298 newkey:; // We have the branch's byte index; what is its chunk index?
1299 uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8;
1303 // re-index keys with adjusted offset
1304 Tbitmap nb = 1U << knybble(key,off,shf);
1305 Tbitmap tb = 1U << knybble(tkey,off,shf);
1306 // Prepare the new leaf.
1310 // Find where to insert a branch or grow an existing branch.
1313 while(isbranch(t)) {
1314 __builtin_prefetch(t->ptr);
1316 if(off == Tindex_offset(i) && shf == Tindex_shift(i))
1318 if(off == Tindex_offset(i) && shf < Tindex_shift(i))
1320 if(off < Tindex_offset(i))
1322 Tbitmap b = twigbit(i, key, len);
1323 assert(hastwig(i, b));
1324 t = Tbranch_twigs(t) + twigoff(i, b);
1327 Trie *twigs = malloc(sizeof(Trie) * 2);
1328 if(twigs == NULL) return(NULL);
1329 i = Tindex_new(shf, off, nb | tb);
1330 twigs[twigoff(i, nb)] = nt;
1331 twigs[twigoff(i, tb)] = *t;
1332 Tset_twigs(t, twigs);
1336 assert(!hastwig(i, nb));
1337 uint s, m; TWIGOFFMAX(s, m, i, nb);
1338 twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1));
1339 if(twigs == NULL) return(NULL);
1340 memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s));
1341 memmove(twigs+s, &nt, sizeof(Trie));
1342 Tset_twigs(t, twigs);
1343 Tset_index(t, Tbitmap_add(i, nb));
1350 // qp-debug.c: qp trie debug support
1352 // Written by Tony Finch <dot@@dotat.at>
1353 // You may do anything with this. It has no warranty.
1354 // <http://creativecommons.org/publicdomain/zero/1.0/>
1357 #include <stdbool.h>
1366 dump_rec(Trie *t, int d) {
1368 printf("Tdump%*s branch %p %zu %d\n", d, "", t,
1369 (size_t)t->branch.index, t->branch.flags);
1370 int dd = 2 + t->branch.index * 4 + (t->branch.flags - 1) * 2;
1372 for(uint i = 0; i < 16; i++) {
1375 printf("Tdump%*s twig %d\n", d, "", i);
1376 dump_rec(twig(t, twigoff(t, b)), dd);
1380 printf("Tdump%*s leaf %p\n", d, "", t);
1381 printf("Tdump%*s leaf key %p %s\n", d, "",
1382 t->leaf.key, t->leaf.key);
1383 printf("Tdump%*s leaf val %p\n", d, "",
1390 printf("Tdump root %p\n", tbl);
1392 dump_rec(&tbl->root, 0);
1396 size_rec(Trie *t, uint d,
1397 size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1398 *rsize += sizeof(*t);
1401 for(uint i = 0; i < 16; i++) {
1404 size_rec(twig(t, twigoff(t, b)),
1405 d+1, rsize, rdepth, rbranches, rleaves);
1414 Tsize(Tbl *tbl, const char **rtype,
1415 size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1417 *rsize = *rdepth = *rbranches = *rleaves = 0;
1419 size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves);
1425 // fn-debug.c: fn trie debug support
1427 // Written by Tony Finch <dot@@dotat.at>
1428 // You may do anything with this. It has no warranty.
1429 // <http://creativecommons.org/publicdomain/zero/1.0/>
1432 #include <stdbool.h>
1441 dump_bitmap(Tbitmap w) {
1442 static char buf[32*3];
1443 int size = (int)sizeof(buf), n = 0;
1444 n += snprintf(buf+n, size-n, "(");
1445 for(uint s = 0; s < 32; s++) {
1448 n += snprintf(buf+n, size-n, "%u,", s);
1456 dump_rec(Trie *t, uint d) {
1457 Tindex i = t->index;
1458 if(Tindex_branch(i)) {
1459 printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t,
1460 dump_bitmap(Tindex_bitmap(i)),
1461 (size_t)Tindex_offset(i), Tindex_shift(i));
1462 uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i);
1464 for(uint s = 0; s < 32; s++) {
1467 printf("Tdump%*s twig %d\n", d, "", s);
1468 dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd);
1472 printf("Tdump%*s leaf %p\n", d, "",
1474 printf("Tdump%*s leaf key %p %s\n", d, "",
1475 (const void *)Tleaf_key(t), Tleaf_key(t));
1476 printf("Tdump%*s leaf val %p\n", d, "",
1477 (void *)Tleaf_val(t));
1483 printf("Tdump root %p\n", (void*)tbl);
1489 size_rec(Trie *t, uint d,
1490 size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1491 *rsize += sizeof(*t);
1492 Tindex i = t->index;
1493 if(Tindex_branch(i)) {
1495 for(uint s = 0; s < 32; s++) {
1496 Tbitmap b = 1U << s;
1498 size_rec(Tbranch_twigs(t) + twigoff(i, b),
1499 d+1, rsize, rdepth, rbranches, rleaves);
1508 Tsize(Tbl *tbl, const char **rtype,
1509 size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) {
1511 *rsize = *rdepth = *rbranches = *rleaves = 0;
1513 size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves);
1522 #include <stdbool.h>
1529 static void bail(const char *msg, ...)
1534 vfprintf(stderr, msg, ap);
1536 fputc('\n', stderr);
1540 static unsigned long seq = 0;
1546 int main(int argc, char *argv[])
1552 size_t len, size, depth, branches, leaves;
1555 for (i = 1; i < argc; i++) {
1559 len = strlen(p) + 1; size = (len + sizeof(*v) + 15)&-16;
1560 v = malloc(size); if (!v) bail("out of memory");
1561 memcpy((char *)v + size - len, p, len);
1562 v->k = (char *)v + size - len; v->seq = seq++;
1563 tt = Tset(t, v->k, v);
1564 if (!tt) bail("Tset failed: %s", strerror(errno));
1567 v = Tget(t, p); t = Tdel(t, p);
1572 if (!v) puts("(not found)");
1573 else printf("%lu\n", v->seq);
1574 assert(strcmp(p, v->k) == 0);
1580 Tsize(t, &p, &size, &depth, &branches, &leaves);
1581 printf("impl = %s; size = %zu, depth = %zu, "
1582 "branches = %zu, leaves = %zu\n",
1583 p, size, depth, branches, leaves);
1586 bail("unrecognized command `%c'", *--p);
1591 p = 0; len = 0; Tnextl(t, &p, &len, &q); v = q; assert(p == v->k);
1592 t = Tdell(t, p, len); free(v);
1598 @ \end{document} \iftrue % gobble following `\fi'