%%% -*- mode: latex; mmm-classes: cweb -*- \documentclass[baseclass = strayman]{cweb} \usepackage{enumerate} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage[palatino, helvetica, courier, maths = cmr]{mdwfonts} \let\saveCwebN=\N \usepackage{mdwmath} \usepackage{crypto} \usepackage{tikz} \usetikzlibrary{arrows.meta} \usetikzlibrary{shapes.geometric} \usetikzlibrary{shapes.misc} \let\N=\saveCwebN \def\bitor#1!{\begingroup\let\MOD=\OR#1\endgroup} \def\email#1{\href{mailto:#1}{\nolinkurl{#1}}} \defop\hwt{hwt} \defbrk\parens{(}{)} \defbrk\index{[}{]} \def\card{\#} \let\NULL=\Lambda \def\fixme#1{\marginpar{\textbf{FIXME!}}\fbox{\textbf{FIXME:} #1}} \makeatletter \def\CwebNumber#1{% % octal, hex, or decimal constant \hbox{% $% \def\?{\kern.2em}% \def\$##1{\egroup\sb{\rm##1}\bgroup}% suffix to constant \def\_{\cdot 10^{\aftergroup}}% power of ten (via dirty trick) \let\~\cwbb@@oct \let\^\cwbb@@hex {#1}% $}% } \def\CwebSectLayout{\normalfont\sffamily\bfseries} \def\CwebPointer{\mathord{\rightarrow}} \tikzset {> = {Computer Modern Rightarrow[length = 5pt, width = 7pt]}, algstep/.style = {draw, align = center, inner sep = 6pt}, algcond/.style = {algstep, shape = rounded rectangle, aspect = 2}} \title{QP-tries} \author{Tony Finch \and Mark Wooding} \begin{document} \maketitle \tableofcontents %% hack formatting for various identifiers @s uint64_t int @s uint32_t int @s xor anything %%%-------------------------------------------------------------------------- @* Introduction. Some blurb here? The code here is all Tony's. I haven't even reformatted it, though \texttt{CWEAVE} applies its own formatting. %%%-------------------------------------------------------------------------- @* Preliminaries. @*1 Copyright notice. More strictly, this is a notice about a lack of copyright. The qp-trie code is, to the maximum extent permitted, not protected by copyright. Some jurisdictions allow authors to abandon their copyrights explicitly; others don't. @= /* Written by Tony Finch \email{dot@@dotat.at} \\ You may do anything with this. It has no warranty. \\ \url{http://creativecommons.org/publicdomain/zero/1.0/} */ @*1 Tables. A \emph{table} is an abstract data type which maintains a dynamic mapping from null-terminated strings to non-null |void *| pointers. The caller is responsible for managing the the key and value storage. Value pointers must be word-aligned: our tables will make use of low-order pointer bits to store tags. General-purpose allocators like |malloc| will return suitably aligned memory on conventional target platforms; modifications will need to be made to support word-addressed machines which lack space for tag bits. The interface is defined in a header file \texttt{Tbl.h}. @(Tbl.h@>= // Tbl.h: an abstract API for tables with string keys. @ @# #ifndef Tbl_h #define Tbl_h @# @<\texttt{Tbl.h} declarations@> @# #endif @ The interface includes a number of convenience functions which can be implemented in terms of other functions, rather than by each individual table data structure. These common implementations are given in \texttt{Tbl.c}. @(Tbl.c@>= // Tbl.c: simpler wrappers for core table functions @ @# #include #include #include @# #include "Tbl.h" @# @ @ A table is represented by a pointer to the incomplete |struct| type |Tbl|. There's no function to initialize a table: an empty table is simply represented as a null pointer. Table pointers are passed by value; functions such as |Tset| which update the table return the new pointer. @<\texttt{Tbl.h} declarations@>= typedef struct Tbl Tbl; @ The |Tget|\ldots\ functions retrieve the information assoicated with a key. The simplest function is |Tget|, which takes pointers to a table and a null-terminated key string, and returns the associated value pointer; if there is no entry for the given key, then |Tget| returns null. It's not possible to associate a key with a null pointer, so this is not ambiguous. If you already have the string's length lying around for some reason, you can call |Tgetl| instead, passing the length along. The length does not include the null terminator, which must still be present. @<\texttt{Tbl.h} declarations@>= void *Tgetl(Tbl *tbl, const char *key, size_t klen); void *Tget(Tbl *tbl, const char *key); @ These functions are both implemented in terms of the primitive |Tgetkv|. If an entry for the key string is found in the table, then |Tgetkv| stores pointers to the original key string and the associated value in |*rkey| and |*rval|, respectively, and returns |true|; otherwise, it returns |false| leaving |*rkey| and |*rval| unchanged. @<\texttt{Tbl.h} declarations@>= bool Tgetkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); @ The implementations of |Tget| and |Tgetl| are trivial. @= void * Tgetl(Tbl *tbl, const char *key, size_t len) { const char *rkey = NULL; void *rval = NULL; if(Tgetkv(tbl, key, len, &rkey, &rval)) return(rval); else return(NULL); } void * Tget(Tbl *tbl, const char *key) { return(Tgetl(tbl, key, strlen(key))); } @ The |Tset|\ldots\ functions add or modify entries in the table. They return the (possibly modified) table pointer. The |Tset| function takes a pointer to a null-terminated key string and the |void *| value pointer to associate with it. If the table already contains an entry for the key, then the entry is updated to associate the new value with the key; the old value pointer is lost. Otherwise, a new entry is added to the table, associating the key with the value. If the value pointer is null, then any existing entry for the key is removed, as if |Tdel| had been called. The |Tsetl| function is the same, except that it also expects to be given the length of the key string. As for |Tgetl| above, the length doesn't include the null terminator, which must still be present. Updating the table can fail. Failures are reported by setting the global variable |errno| to a nonzero value and returning a null pointer. The possible |errno| values are as follows. \begin{description} \item[|EINVAL|] The value pointer is not word-aligned. \item[|ENOMEM|] There was insufficient memory to extend the table. \end{description} This means that the |Tset|\ldots\ functions can return null in two different cases. \begin{itemize} \item If the value pointer is not null, then they return null if updating the table fails. In this case, they set |errno| to a nonzero value to indicate the problem. \item If the value pointer is null, then they return null if the table is now empty. This is a successful outcome; |errno| is not changed. \end{itemize} @<\texttt{Tbl.h} declarations@>= Tbl *Tsetl(Tbl *tbl, const char *key, size_t klen, void *value); Tbl *Tset(Tbl *tbl, const char *key, void *value); @ The |Tset| function is implemented in terms of |Tsetl| in the obvious way. The |Tsetl| function is primitive, implemented by the data structure. @= Tbl * Tset(Tbl *tbl, const char *key, void *value) { return(Tsetl(tbl, key, strlen(key), value)); } @ The |Tdel|\ldots\ functions remove entries from the table. They return the new table pointer, which will be null if the table is now empty. Following the now-familiar pattern, the |Tdel| function takes pointers to a table and a null-terminated key string; the |Tdell| function is additionally passed the length of the string. @<\texttt{Tbl.h} declarations@>= Tbl *Tdell(Tbl *tbl, const char *key, size_t klen); Tbl *Tdel(Tbl *tbl, const char *key); @ The |Tdel| and |Tdell| functions are implemented in terms of the primitive |Tdelkv|, which additionally saves pointers to the entry's original key and value in |*rkey| and |*rvalue| so that their storage can be reclaimed. @<\texttt{Tbl.h} declarations@>= Tbl *Tdelkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); @ The implementations of |Tdel| and |Tdell| are straightforward. @= Tbl * Tdell(Tbl *tbl, const char *key, size_t len) { const char *rkey = NULL; void *rval = NULL; return(Tdelkv(tbl, key, len, &rkey, &rval)); } Tbl * Tdel(Tbl *tbl, const char *key) { return(Tdell(tbl, key, strlen(key))); } @ We next turn to enumerating the entries in a table. The |Tnext| function is given a pointer to a table, and the addresses |pkey| and |pvalue| of key-string and value pointers. On entry, |*pkey| must point to a key string for which an entry is present in the table; on exit, |*pkey| and |*pvalue| are updated to point to the next key, in lexicographic order, for which there is an entry in the table, |*pval| is set to the associated value pointer, and |true| is returned. If no such entry exists, then |*pkey| is set to null and |false| is returned. As a special case, if |*pkey| is null on entry, then the key and value for the table entry whose key is lexicographically first is found; if the table is entry, then, again, |false| is returned. The initial value of |*pvalue| is ignored. The |Tnextl| function is similar, except that it also stores the length of the key string in |*pklen|. The |Tnxt| function has a simpler interface, intended for debugging: it receives a key string pointer, rather than its address, as an argument, and returns a pointer to the lexicographically next key string present in the table, or null if no such string exists; it doesn't report the associated value pointer at all. @<\texttt{Tbl.h} declarations@>= bool Tnextl(Tbl *tbl, const char **pkey, size_t *pklen, void **pvalue); bool Tnext(Tbl *tbl, const char **pkey, void **pvalue); const char *Tnxt(Tbl *tbl, const char *key); @ The |Tnext| and |Tnxt| functions are implemented in terms of |Tnextl| in the obvious way. @= bool Tnext(Tbl *tbl, const char **pkey, void **pvalue) { size_t len = *pkey == NULL ? 0 : strlen(*pkey); return(Tnextl(tbl, pkey, &len, pvalue)); } const char * Tnxt(Tbl *tbl, const char *key) { void *value = NULL; Tnext(tbl, &key, &value); return(key); } @ Finally, we define some diagnostic tools. The |Tsize| function reports statistics about a table. Specifically, it stores information in the addresses given as its arguments: \begin{description} \item[|*rtype|] A statically allocated string describing the table implementation. \item[|*size|] The total memory allocated by the table. This doesn't include keys or values, because they're allocated by the caller. \item[|*rdepth|] The total depth of the search structure. \fixme{why is this an interesting thing to know?} \item[|*rbranch|] The number of branch nodes in the search structure. \item[|*rleaves|] The number of leaf nodes in the search structure. \end{description} The |Tdump| function prints a diagnostic dump of a table to standard output. Both of these functions are primitive, implemented separately for each data structure. @<\texttt{Tbl.h} declarations@>= void Tdump(Tbl *tbl); void Tsize(Tbl *tbl, const char **rtype, @| size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves); @ \relax %%%-------------------------------------------------------------------------- @* QP-Tries. @*1 Theory lesson. QP-tries are a data structure for keeping track of a dynamic set of \emph{strings}. It'll be best if we don't think of these strings as being made up of bytes or characters: those units are too large to be convenient. Instead, we'll work with a smaller set~$\Sigma$ of \emph{symbols}, which will usually be small fixed-size groups of bits, say four or five. The limiting factor will be that the number of symbols $\card\Sigma$ must be at most equal to the machine's word size $w$. There's another constraint, which is that the set of strings that we work with must be \emph{prefix-free}. This usually most easily arranged by ending each string with a \emph{terminating symbol}. For C~strings, the obvious choice is a null character, since C has a preference for null-terminated strings anyway. @ The trie \emph{root} points at a node. The trie structure consists of two kinds of nodes called \emph{branch} nodes and \emph{leaf} nodes. Both kinds of nodes are the same size. Both kinds of notes have a \emph{tag bit} which is clear in leaf nodes and set in branch nodes. (The opposite convention is possible, but doesn't work as well in practice.) In broad outline, searching QP-trie for a key proceeds as follows. We start at the root. A branch node has children corresponding to the symbols in $\Sigma$: if we encounter one, we move to the child selected by one of the symbols of the search key. A leaf node holds a key and its associated value: if we encounter one, then we compare the search key against the key in the leaf, and return the associated value if keys match, or report failure if they don't. The trie is compressed in two ways. \begin{itemize} \item Branch nodes which would have fewer than two children are elided. Branch nodes contain an \emph{offset} to indicate which of the search key's symbols to check. \item Children which don't have leaves are omitted. A branch node contains a \emph{bitmap}, indexed by symbols in some convenient ordering: the bit corresponding to a symbol is set if the node has a corresponding child. A branch node also holds a pointer to a \emph{twigs} vector, which simply holds the node's remaining children, in order. \end{itemize} The size of a branch node's twigs vector is equal to the Hamming weight -- i.e., the number of set bits -- of its bitmap~$B$. Moreover, the index of the child corresponding to a symbol $s \in \Sigma$, if one exists, is equal to the number of present children corresponding to symbols preceding~$s$ in the ordering, which can conveniently be computed by \[ \hwt\bigparens{B \bitand \bigparens{(1 \lsl s) - 1}} \mpunct, \] where $\hwt(\cdot)$ denotes Hamming weight. This leads to the first algorithm, for lookup in a QP-trie. \begin{list} {\textbf{Algorithm L} \emph{(Lookup in a QP-trie.)}} {\leftmargin=0pt \rightmargin=0pt \labelwidth=-\labelsep} \item Given a QP-trie root pointer $\texttt{ROOT}$ and a key string~$k$ of length~$n$, find the value associated with~$k$ in the trie. Every node~$\texttt{N}$ has a tag bit $\texttt{TAG(N)}$. If $\texttt{TAG(N)} = 0$ then $\texttt{N}$ points to a leaf node, which has $\texttt{KEY}$ and $\texttt{VALUE}$ fields. If $\texttt{TAG(N)} = 1$ then $\texttt{N}$ points to a branch node, which has $\texttt{OFFSET}$, $\texttt{BITMAP}$, and $\texttt{TWIGS}$ fields. \begin{enumerate}[\bfseries L1.] \item{} [Empty tree?] If $\texttt{ROOT} = \NULL$, then return failure. \item{} [Initialize.] Set $\texttt{N} \gets \texttt{ROOT}$. \item{} [Check tag.] \label{en:qp.lookup.chk-tag} If $\texttt{TAG(N)} = 0$ then then $\texttt{N}$ points to a leaf node: go to step~L\ref{en:qp.lookup.cmp-key}. Otherwise, it points to a branch node: go to step~L\ref{en:qp.lookup.chk-len}. \item{} [Check length.] \label{en:qp.lookup.chk-len} If $\texttt{OFFSET(N)} \ge n$ then return failure. Otherwise, continue with the next step. \item{} [Check symbol.] Let $s \gets k[\texttt{OFFSET(N)}]$. If $\texttt{BITMAP(N)} \bitand (1 \lsl s) = 0$ then return failure. Otherwise, set $\texttt{N}$ to be the address of $\texttt{TWIGS(N)}[\hwt(\texttt{BITMAP(N)} \bitand ((1 \lsl s) - 1))]$ and go to step~L\ref{en:qp.lookup.chk-tag}. \item{} [Compare key.] \label{en:qp.lookup.cmp-key} If the strings $k$ and $\texttt{KEY(N)}$ are equal then return $\texttt{VALUE(N)}$ successfully; otherwise return failure. \qed \end{enumerate} \end{list} \begin{figure} \centering \begin{tikzpicture} [x = 48mm, y = 10mm] \node[algcond] (emptyp) at (0, 8) {L1. Empty tree?}; \node[algstep] (init) at (0, 6) {L2. Initialize}; \node[algcond] (leafp) at (0, 4) {L3. Check tag}; \node[algcond] (len) at (1, 5.5) {L4. Check length}; \node[algcond] (sym) at (1, 3) {L5. Check symbol}; \node[algcond] (key) at (0, 2) {L6. Compare key}; \node (lose) at (2, 4) {\texttt{FAILURE}}; \node (win) at (0, 0) {\texttt{SUCCESS}}; \path[->] (emptyp) edge node[left] {$\texttt{ROOT} \ne \NULL$} (init) edge [out = 0, in = 140] node[above, sloped] {$\texttt{ROOT} = \NULL$} (lose) (init) edge (leafp) (leafp) edge node[left] {$\texttt{TAG(N)} = 0$} (key) edge node[sloped, above] {$\texttt{TAG(N)} = 1$} (len) (len) edge node[left] {$\texttt{OFFSET(N)} < n$} (sym) edge node[above, sloped] {$\texttt{OFFSET(N)} \ge n$} (lose) (sym) edge node[below, sloped, align = center] {bit $s$ set in \\ $\texttt{BITMAP(N)}$} (leafp) edge node[below, sloped, align = center] {bit $s$ clear in \\ $\texttt{BITMAP(N)}$} (lose) (key) edge node[left] {$k$ matches $\texttt{KEY(N)}$} (win) edge [out = 0, in = -125] node[below] {$k$ doesn't match $\texttt{KEY(N)}$} (lose); \end{tikzpicture} \caption{QP-trie lookup algorithm} \label{fig:qp.lookup} \end{figure} @* {qp.h}. @(qp.h@>= // qp.h: tables implemented with quadbit popcount patricia tries. // // Written by Tony Finch // You may do anything with this. It has no warranty. // // In a trie, keys are divided into digits depending on some radix // e.g. base 2 for binary tries, base 256 for byte-indexed tries. // When searching the trie, successive digits in the key, from most to // least significant, are used to select branches from successive // nodes in the trie, like: // |for(i = 0; isbranch(node); i++) node = node->branch[key[i]];| // All of the keys in a subtrie have identical prefixes. Tries do not // need to store keys since they are implicit in the structure. // // A patricia trie or crit-bit trie is a binary trie which omits nodes that // have only one child. Nodes are annotated with the index of the bit that // is used to select the branch; indexes always increase as you go further // into the trie. Each leaf has a copy of its key so that when you find a // leaf you can verify that the untested bits match. // // The popcount() function counts the number of bits that are set in // a word. It's also known as the Hamming weight; Knuth calls it // "sideways add". https://en.wikipedia.org/wiki/popcount // // You can use popcount() to implement a sparse array of length N // containing M < N members using bitmap of length N and a packed // vector of M elements. A member i is present in the array if bit // i is set, so M == popcount(bitmap). The index of member i in // the packed vector is the popcount of the bits preceding i. // |mask = 1 << i;| // |if(bitmap & mask)| // |member = vector[popcount(bitmap & mask-1)]| // // See "Hacker's Delight" by Hank Warren, section 5-1 "Counting 1 // bits", subsection "applications". http://www.hackersdelight.org // // Phil Bagwell's hashed array-mapped tries (HAMT) use popcount for // compact trie nodes. String keys are hashed, and the hash is used // as the index to the trie, with radix $2^{32}$ or $2^{64}$. // http://infoscience.epfl.ch/record/64394/files/triesearches.pdf // http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf // // A qp trie uses its keys a quadbit (or nibble or half-byte) at a // time. It is a radix $2^4$ patricia trie, so each node can have between // 2 and 16 children. It uses a 16 bit word to mark which children are // present and popcount to index them. The aim is to improve on crit-bit // tries by reducing memory usage and the number of indirections // required to look up a key. // // The worst case for a qp trie is when each branch has 2 children; // then it is the same shape as a crit-bit trie. In this case there // are n-1 internal branch nodes of two words each, so it is equally // efficient as a crit-bit trie. If the key space is denser then // branches have more children but the same overhead, so the memory // usage is less. For maximally dense tries the overhead is: // // key length (bytes) $n$ // number of leaves $256^n$ // crit-bit branches $256^n - 1$ // qp branches $1 + 16^{2n-1} = 1 + 256^n / 16$ // crit-bit depth $8 n$ // qp depth $2 n$ // // In practice, qp averages about 3.3 words per leaf vs. crit-bit's 4 // words per leaf, and qp has about half the depth. typedef unsigned char byte; typedef unsigned int uint; typedef uint Tbitmap; #if defined(HAVE_NARROW_CPU) || defined(HAVE_SLOW_POPCOUNT) // NOTE: 16 bits only static inline uint popcount(Tbitmap w) { w -= (w >> 1) & 0x5555; w = (w & 0x3333) + ((w >> 2) & 0x3333); w = (w + (w >> 4)) & 0x0F0F; w = (w + (w >> 8)) & 0x00FF; return(w); } #else static inline uint popcount(Tbitmap w) { return((uint)__builtin_popcount(w)); } #endif // Parallel popcount of the top and bottom 16 bits in a 32 bit word. This // is probably only a win if your CPU is short of registers and/or integer // units. NOTE: The caller needs to extract the results by masking with // 0x00FF0000 and 0x000000FF for the top and bottom halves. static inline uint popcount16x2(uint w) { w -= (w >> 1) & 0x55555555; w = (w & 0x33333333) + ((w >> 2) & 0x33333333); w = (w + (w >> 4)) & 0x0F0F0F0F; w = w + (w >> 8); return(w); } // A trie node is two words on 64 bit machines, or three on 32 bit // machines. A node can be a leaf or a branch. In a leaf, the value // pointer must be word-aligned to allow for the tag bits. typedef struct Tleaf { const char *key; void *val; } Tleaf; // Branch nodes are distinguished from leaf nodes using a couple // of flag bits which act as a dynamic type tag. They can be: // // 0 -> node is a leaf // 1 -> node is a branch, testing upper nibble // 2 -> node is a branch, testing lower nibble // // A branch node is laid out so that the flag bits correspond to the // least significant bits bits of one of the leaf node pointers. In a // leaf node, that pointer must be word-aligned so that its flag bits // are zero. We have chosen to place this restriction on the value // pointer. // // A branch contains the index of the byte that it tests. The combined // value \bitor|index << 2 % flags|! increases along the key in big-endian // lexicographic order, and increases as you go deeper into the trie. // All the keys below a branch are identical up to the nibble // identified by the branch. // // A branch has a bitmap of which subtries ("twigs") are present. The // flags, index, and bitmap are packed into one word. The other word // is a pointer to an array of trie nodes, one for each twig that is // present. // XXX We hope that the compiler will not punish us for abusing unions. // XXX This currently assumes a 64 bit little endian machine. // On a 32 bit machine we could perhaps fit a branch in to two words // without restricting the key length by making the index relative // instead of absolute. If the gap between nodes is larger than a 16 // bit offset allows, we can insert a stepping-stone branch with only // one twig. This would make the code a bit more complicated... typedef struct Tbranch { union Trie *twigs; uint64_t flags : 2, index : 46, bitmap : 16; } Tbranch; typedef union Trie { struct Tleaf leaf; struct Tbranch branch; } Trie; struct Tbl { union Trie root; }; // Test flags to determine type of this node. static inline bool isbranch(Trie *t) { return(t->branch.flags != 0); } // Make a bitmask for testing a branch bitmap. // // mask: // 1 -> 0xffff -> 0xfff0 -> 0xf0 // 2 -> 0x0000 -> 0x000f -> 0x0f // // shift: // 1 -> 1 -> 4 // 2 -> 0 -> 0 static inline Tbitmap nibbit(byte k, uint flags) { uint mask = ((flags - 2) ^ 0x0f) & 0xff; uint shift = (2 - flags) << 2; return(1 << ((k & mask) >> shift)); } // Extract a nibble from a key and turn it into a bitmask. static inline Tbitmap twigbit(Trie *t, const char *key, size_t len) { uint64_t i = t->branch.index; if(i >= len) return(1); return(nibbit((byte)key[i], t->branch.flags)); } static inline bool hastwig(Trie *t, Tbitmap bit) { return(t->branch.bitmap & bit); } static inline uint twigoff(Trie *t, Tbitmap b) { return(popcount(t->branch.bitmap & (b-1))); } static inline Trie * twig(Trie *t, uint i) { return(&t->branch.twigs[i]); } #ifdef HAVE_NARROW_CPU #define TWIGOFFMAX(off, max, t, b) do { \ Tbitmap bitmap = t->branch.bitmap; \ uint word = (bitmap << 16) | (bitmap & (b-1)); \ uint counts = popcount16x2(word); \ off = counts & 0xFF; \ max = (counts >> 16) & 0xFF; \ } while(0) #else #define TWIGOFFMAX(off, max, t, b) do { \ off = twigoff(t, b); \ max = popcount(t->branch.bitmap); \ } while(0) #endif @* {qp.c}. @(qp.c@>= // qp.c: tables implemented with quadbit popcount patricia tries. // // Written by Tony Finch // You may do anything with this. It has no warranty. // #include #include #include #include #include #include #include "Tbl.h" #include "qp.h" bool Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { if(tbl == NULL) return(false); Trie *t = &tbl->root; while(isbranch(t)) { __builtin_prefetch(t->branch.twigs); Tbitmap b = twigbit(t, key, len); if(!hastwig(t, b)) return(false); t = twig(t, twigoff(t, b)); } if(strcmp(key, t->leaf.key) != 0) return(false); *pkey = t->leaf.key; *pval = t->leaf.val; return(true); } static bool next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { if(isbranch(t)) { // Recurse to find either this leaf (*pkey != NULL) // or the next one (*pkey == NULL). Tbitmap b = twigbit(t, *pkey, *plen); uint s, m; TWIGOFFMAX(s, m, t, b); for(; s < m; s++) if(next_rec(twig(t, s), pkey, plen, pval)) return(true); return(false); } // We have found the next leaf. if(*pkey == NULL) { *pkey = t->leaf.key; *plen = strlen(*pkey); *pval = t->leaf.val; return(true); } // We have found this leaf, so start looking for the next one. if(strcmp(*pkey, t->leaf.key) == 0) { *pkey = NULL; *plen = 0; return(false); } // No match. return(false); } bool Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { if(tbl == NULL) { *pkey = NULL; *plen = 0; return(NULL); } return(next_rec(&tbl->root, pkey, plen, pval)); } Tbl * Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { if(tbl == NULL) return(NULL); Trie *t = &tbl->root, *p = NULL; Tbitmap b = 0; while(isbranch(t)) { __builtin_prefetch(t->branch.twigs); b = twigbit(t, key, len); if(!hastwig(t, b)) return(tbl); p = t; t = twig(t, twigoff(t, b)); } if(strcmp(key, t->leaf.key) != 0) return(tbl); *pkey = t->leaf.key; *pval = t->leaf.val; if(p == NULL) { free(tbl); return(NULL); } t = p; p = NULL; // Becuase t is the usual name uint s, m; TWIGOFFMAX(s, m, t, b); if(m == 2) { // Move the other twig to the parent branch. Trie *twigs = t->branch.twigs; *t = *twig(t, !s); free(twigs); return(tbl); } memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1)); t->branch.bitmap &= ~b; // We have now correctly removed the twig from the trie, so if // realloc() fails we can ignore it and continue to use the // slightly oversized twig array. Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1)); if(twigs != NULL) t->branch.twigs = twigs; return(tbl); } Tbl * Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { // Ensure flag bits are zero. if(((uint64_t)val & 3) != 0) { errno = EINVAL; return(NULL); } if(val == NULL) return(Tdell(tbl, key, len)); // First leaf in an empty tbl? if(tbl == NULL) { tbl = malloc(sizeof(*tbl)); if(tbl == NULL) return(NULL); tbl->root.leaf.key = key; tbl->root.leaf.val = val; return(tbl); } Trie *t = &tbl->root; // Find the most similar leaf node in the trie. We will compare // its key with our new key to find the first differing nibble, // which can be at a lower index than the point at which we // detect a difference. while(isbranch(t)) { __builtin_prefetch(t->branch.twigs); Tbitmap b = twigbit(t, key, len); // Even if our key is missing from this branch we need to // keep iterating down to a leaf. It doesn't matter which // twig we choose since the keys are all the same up to this // index. Note that blindly using twigoff(t, b) can cause // an out-of-bounds index if it equals twigmax(t). uint i = hastwig(t, b) ? twigoff(t, b) : 0; t = twig(t, i); } // Do the keys differ, and if so, where? size_t i; for(i = 0; i <= len; i++) { if(key[i] != t->leaf.key[i]) goto newkey; } t->leaf.val = val; return(tbl); newkey:; // We have the branch's index; what are its flags? byte k1 = (byte)key[i], k2 = (byte)t->leaf.key[i]; uint f = k1 ^ k2; f = (f & 0xf0) ? 1 : 2; // Prepare the new leaf. Tbitmap b1 = nibbit(k1, f); Trie t1 = { .leaf = { .key = key, .val = val } }; // Find where to insert a branch or grow an existing branch. t = &tbl->root; while(isbranch(t)) { __builtin_prefetch(t->branch.twigs); if(i == t->branch.index && f == t->branch.flags) goto growbranch; if(i == t->branch.index && f < t->branch.flags) goto newbranch; if(i < t->branch.index) goto newbranch; Tbitmap b = twigbit(t, key, len); assert(hastwig(t, b)); t = twig(t, twigoff(t, b)); } newbranch:; Trie *twigs = malloc(sizeof(Trie) * 2); if(twigs == NULL) return(NULL); Trie t2 = *t; // Save before overwriting. Tbitmap b2 = nibbit(k2, f); t->branch.twigs = twigs; t->branch.flags = f; t->branch.index = i; t->branch.bitmap = b1 | b2; *twig(t, twigoff(t, b1)) = t1; *twig(t, twigoff(t, b2)) = t2; return(tbl); growbranch:; assert(!hastwig(t, b1)); uint s, m; TWIGOFFMAX(s, m, t, b1); twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1)); if(twigs == NULL) return(NULL); memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); memmove(twigs+s, &t1, sizeof(Trie)); t->branch.twigs = twigs; t->branch.bitmap |= b1; return(tbl); } @* {fn.h}. @(fn.h@>= // fn.h: quintet bit popcount patricia tries, new version // // This version uses somewhat different terminology than older // variants. The location of a quintet in the key is now called its // "offset", and the whole word containing the offset, bitmap, and tag // bit is called the "index word" (by analogy with a database index). // The precise quintet location is represented as a byte offset and a // shift. Previously a flags field contained the isbranch tag and shift, // but these are now separate. // // Instead of trying to use bit fields, this code uses accessor // functions to split up a pair of words into their constituent parts. // This should improve portability to machines with varying endianness // and/or word size. // // Written by Tony Finch // You may do anything with this. It has no warranty. // typedef unsigned char byte; typedef unsigned int uint; typedef uint32_t Tbitmap; typedef uint64_t Tindex; const char *dump_bitmap(Tbitmap w); static inline uint byte_me(char c) { return(c & 0xFF); } static inline uint word_up(const char *p) { uint w = byte_me(p[0]) << 8; if(w) w |= byte_me(p[1]); return(w); } #if defined(HAVE_SLOW_POPCOUNT) static inline uint popcount(Tbitmap w) { w -= (w >> 1) & 0x55555555; w = (w & 0x33333333) + ((w >> 2) & 0x33333333); w = (w + (w >> 4)) & 0x0F0F0F0F; w = (w * 0x01010101) >> 24; return(w); } #else static inline uint popcount(Tbitmap w) { return((uint)__builtin_popcount(w)); } #endif typedef struct Tbl { Tindex index; void *ptr; } Trie; // accessor functions, except for the index word #define Tset_field(cast, elem, type, field) \ static inline void \ Tset_##field(Trie *t, type field) { \ t->elem = cast field; \ } \ struct dummy @; Tset_field((void *), ptr, Trie *, twigs); Tset_field((Tindex), index, Tindex, index); Tset_field((void *)(uint64_t), ptr, const char *, key); Tset_field((Tindex), index, void *, val); static inline bool Tindex_branch(Tindex i); static inline bool isbranch(Trie *t) { return(Tindex_branch(t->index)); } #ifdef WITH_EXTRA_CHECKS #define Tbranch(t) assert(isbranch(t)) #define Tleaf(t) assert(!isbranch(t)) #else #define Tbranch(t) #define Tleaf(t) #endif #define Tcheck_get(type, tag, field, expr) \ static inline type \ tag##_##field(Trie *t) { \ tag(t); \ return(expr); \ } \ struct dummy @; Tcheck_get(Trie *, Tbranch, twigs, t->ptr); Tcheck_get(const char *, Tleaf, key, t->ptr); Tcheck_get(void *, Tleaf, val, (void*)t->index); // index word layout #define Tix_width_branch 1 #define Tix_width_shift 3 #define Tix_width_offset 28 #define Tix_width_bitmap 32 #define Tix_base_branch 0 #define Tix_base_shift (Tix_base_branch + Tix_width_branch) #define Tix_base_offset (Tix_base_shift + Tix_width_shift) #define Tix_base_bitmap (Tix_base_offset + Tix_width_offset) #define Tix_place(field) ((Tindex)(field) << Tix_base_##field) #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL) #define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field) \ & Tix_mask(field))) #define Tmaxlen Tix_mask(offset) // index word accessor functions #define Tindex_get(type, field) \ static inline type \ Tindex_##field(Tindex i) { \ return(Tunmask(field, i)); \ } \ struct dummy @; Tindex_get(bool, branch); Tindex_get(uint, shift); Tindex_get(uint, offset); Tindex_get(Tbitmap, bitmap); static inline Tindex Tindex_new(uint shift, uint offset, Tbitmap bitmap) { uint branch = 1; return( Tix_place(branch) | Tix_place(shift) | Tix_place(offset) | Tix_place(bitmap) ); } static inline Tindex Tbitmap_add(Tindex i, Tbitmap bitmap) { return(i | Tix_place(bitmap)); } static inline Tindex Tbitmap_del(Tindex i, Tbitmap bitmap) { return(i & ~Tix_place(bitmap)); } // sanity checks! #ifndef static_assert #define static_assert_cat(a,b) a##b #define static_assert_name(line) static_assert_cat(static_assert_,line) #define static_assert(must_be_true,message) \ static const void *static_assert_name(__LINE__) \ [must_be_true ? 2 : -1] = { \ message, \ &static_assert_name(__LINE__) } #endif static_assert(Tix_base_bitmap + Tix_width_bitmap == 64, "index fields must fill a 64 bit word"); static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678, "extracting the bitmap works"); static_assert(Tunmask(offset,0x0420ULL) == 0x42, "extracting the offset works"); static_assert(Tunmask(shift,0xFEDCBAULL) == 5, "extracting the shift works"); @=// ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4]..@> @=// | | | | | |@> @=// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0@> @=// | | | | | | | | |@> @=// shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3@> static inline byte knybble(const char *key, uint off, uint shift) { uint word = word_up(key+off); uint right = 16 - 5 - shift; return((word >> right) & 0x1FU); } static inline byte nibble(Tindex i, const char *key, size_t len) { uint off = Tindex_offset(i); if(off >= len) return(0); else return(knybble(key, off, Tindex_shift(i))); } static inline Tbitmap twigbit(Tindex i, const char *key, size_t len) { return(1U << nibble(i, key, len)); } static inline bool hastwig(Tindex i, Tbitmap bit) { return(Tindex_bitmap(i) & bit); } static inline uint twigoff(Tindex i, Tbitmap bit) { return(popcount(Tindex_bitmap(i) & (bit-1))); } #define TWIGOFFMAX(off, max, i, b) do { \ off = twigoff(i, b); \ max = popcount(Tindex_bitmap(i)); \ } while(0) @* {fn.c}. @(fn.c@>= // fn.h: quintet bit popcount patricia tries, new version // // Written by Tony Finch // You may do anything with this. It has no warranty. // #include #include #include #include #include #include #include "Tbl.h" #include "fn.h" bool Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) { if(t == NULL) return(false); while(isbranch(t)) { __builtin_prefetch(t->ptr); Tindex i = t->index; Tbitmap b = twigbit(i, key, len); if(!hastwig(i, b)) return(false); t = Tbranch_twigs(t) + twigoff(i, b); } if(strcmp(key, Tleaf_key(t)) != 0) return(false); *pkey = Tleaf_key(t); *pval = Tleaf_val(t); return(true); } static bool next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { Tindex i = t->index; if(Tindex_branch(i)) { // Recurse to find either this leaf (*pkey != NULL) // or the next one (*pkey == NULL). Tbitmap b = twigbit(i, *pkey, *plen); uint s, m; TWIGOFFMAX(s, m, i, b); for(; s < m; s++) if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval)) return(true); return(false); } // We have found the next leaf. if(*pkey == NULL) { *pkey = Tleaf_key(t); *plen = strlen(*pkey); *pval = Tleaf_val(t); return(true); } // We have found this leaf, so start looking for the next one. if(strcmp(*pkey, Tleaf_key(t)) == 0) { *pkey = NULL; *plen = 0; return(false); } // No match. return(false); } bool Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { if(tbl == NULL) { *pkey = NULL; *plen = 0; return(NULL); } return(next_rec(tbl, pkey, plen, pval)); } Tbl * Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { if(tbl == NULL) return(NULL); Trie *t = tbl, *p = NULL; Tindex i = 0; Tbitmap b = 0; while(isbranch(t)) { __builtin_prefetch(t->ptr); i = t->index; b = twigbit(i, key, len); if(!hastwig(i, b)) return(tbl); p = t; t = Tbranch_twigs(t) + twigoff(i, b); } if(strcmp(key, Tleaf_key(t)) != 0) return(tbl); *pkey = Tleaf_key(t); *pval = Tleaf_val(t); if(p == NULL) { free(tbl); return(NULL); } Trie *twigs = Tbranch_twigs(p); uint m = popcount(Tindex_bitmap(i)); assert(twigs <= t && t < twigs+m); if(m == 2) { // Move the other twig to the parent branch. *p = twigs[twigs == t]; free(twigs); return(tbl); } memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie)); p->index = Tbitmap_del(i, b); // We have now correctly removed the twig from the trie, so if // realloc() fails we can ignore it and continue to use the // slightly oversized twig array. twigs = realloc(twigs, sizeof(Trie) * (m - 1)); if(twigs != NULL) Tset_twigs(p, twigs); return(tbl); } Tbl * Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { if(Tindex_branch((Tindex)val) || len > Tmaxlen) { errno = EINVAL; return(NULL); } if(val == NULL) return(Tdell(tbl, key, len)); // First leaf in an empty tbl? if(tbl == NULL) { tbl = malloc(sizeof(*tbl)); if(tbl == NULL) return(NULL); Tset_key(tbl, key); Tset_val(tbl, val); return(tbl); } Trie *t = tbl; // Find the most similar leaf node in the trie. We will compare // its key with our new key to find the first differing nibble, // which can be at a lower index than the point at which we // detect a difference. while(isbranch(t)) { __builtin_prefetch(t->ptr); Tindex i = t->index; Tbitmap b = twigbit(i, key, len); // Even if our key is missing from this branch we need to // keep iterating down to a leaf. It doesn't matter which // twig we choose since the keys are all the same up to this // index. Note that blindly using twigoff(t, b) can cause // an out-of-bounds index if it equals twigmax(t). uint s = hastwig(i, b) ? twigoff(i, b) : 0; t = Tbranch_twigs(t) + s; } // Do the keys differ, and if so, where? uint off, xor, shf; const char *tkey = Tleaf_key(t); for(off = 0; off <= len; off++) { xor = (byte)key[off] ^ (byte)tkey[off]; if(xor != 0) goto newkey; } Tset_val(t, val); return(tbl); newkey:; // We have the branch's byte index; what is its chunk index? uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8; uint qo = bit / 5; off = qo * 5 / 8; shf = qo * 5 % 8; // re-index keys with adjusted offset Tbitmap nb = 1U << knybble(key,off,shf); Tbitmap tb = 1U << knybble(tkey,off,shf); // Prepare the new leaf. Trie nt; Tset_key(&nt, key); Tset_val(&nt, val); // Find where to insert a branch or grow an existing branch. t = tbl; Tindex i = 0; while(isbranch(t)) { __builtin_prefetch(t->ptr); i = t->index; if(off == Tindex_offset(i) && shf == Tindex_shift(i)) goto growbranch; if(off == Tindex_offset(i) && shf < Tindex_shift(i)) goto newbranch; if(off < Tindex_offset(i)) goto newbranch; Tbitmap b = twigbit(i, key, len); assert(hastwig(i, b)); t = Tbranch_twigs(t) + twigoff(i, b); } newbranch:; Trie *twigs = malloc(sizeof(Trie) * 2); if(twigs == NULL) return(NULL); i = Tindex_new(shf, off, nb | tb); twigs[twigoff(i, nb)] = nt; twigs[twigoff(i, tb)] = *t; Tset_twigs(t, twigs); Tset_index(t, i); return(tbl); growbranch:; assert(!hastwig(i, nb)); uint s, m; TWIGOFFMAX(s, m, i, nb); twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1)); if(twigs == NULL) return(NULL); memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); memmove(twigs+s, &nt, sizeof(Trie)); Tset_twigs(t, twigs); Tset_index(t, Tbitmap_add(i, nb)); return(tbl); } @* {qp-debug.c}. @(qp-debug.c@>= // qp-debug.c: qp trie debug support // // Written by Tony Finch // You may do anything with this. It has no warranty. // #include #include #include #include #include #include "Tbl.h" #include "qp.h" static void dump_rec(Trie *t, int d) { if(isbranch(t)) { printf("Tdump%*s branch %p %zu %d\n", d, "", t, (size_t)t->branch.index, t->branch.flags); int dd = 2 + t->branch.index * 4 + (t->branch.flags - 1) * 2; assert(dd > d); for(uint i = 0; i < 16; i++) { uint b = 1 << i; if(hastwig(t, b)) { printf("Tdump%*s twig %d\n", d, "", i); dump_rec(twig(t, twigoff(t, b)), dd); } } } else { printf("Tdump%*s leaf %p\n", d, "", t); printf("Tdump%*s leaf key %p %s\n", d, "", t->leaf.key, t->leaf.key); printf("Tdump%*s leaf val %p\n", d, "", t->leaf.val); } } void Tdump(Tbl *tbl) { printf("Tdump root %p\n", tbl); if(tbl != NULL) dump_rec(&tbl->root, 0); } static void size_rec(Trie *t, uint d, size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { *rsize += sizeof(*t); if(isbranch(t)) { *rbranches += 1; for(uint i = 0; i < 16; i++) { Tbitmap b = 1 << i; if(hastwig(t, b)) size_rec(twig(t, twigoff(t, b)), d+1, rsize, rdepth, rbranches, rleaves); } } else { *rleaves += 1; *rdepth += d; } } void Tsize(Tbl *tbl, const char **rtype, size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { *rtype = "qp"; *rsize = *rdepth = *rbranches = *rleaves = 0; if(tbl != NULL) size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); } @* {fn-debug.c}. @(fn-debug.c@>= // fn-debug.c: fn trie debug support // // Written by Tony Finch // You may do anything with this. It has no warranty. // #include #include #include #include #include #include "Tbl.h" #include "fn.h" const char * dump_bitmap(Tbitmap w) { static char buf[32*3]; int size = (int)sizeof(buf), n = 0; n += snprintf(buf+n, size-n, "("); for(uint s = 0; s < 32; s++) { Tbitmap b = 1 << s; if(w & b) n += snprintf(buf+n, size-n, "%u,", s); } if(n > 1) buf[n-1] = ')'; return buf; } static void dump_rec(Trie *t, uint d) { Tindex i = t->index; if(Tindex_branch(i)) { printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t, dump_bitmap(Tindex_bitmap(i)), (size_t)Tindex_offset(i), Tindex_shift(i)); uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i); assert(dd > d); for(uint s = 0; s < 32; s++) { Tbitmap b = 1 << s; if(hastwig(i, b)) { printf("Tdump%*s twig %d\n", d, "", s); dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd); } } } else { printf("Tdump%*s leaf %p\n", d, "", (void *)t); printf("Tdump%*s leaf key %p %s\n", d, "", (const void *)Tleaf_key(t), Tleaf_key(t)); printf("Tdump%*s leaf val %p\n", d, "", (void *)Tleaf_val(t)); } } void Tdump(Tbl *tbl) { printf("Tdump root %p\n", (void*)tbl); if(tbl != NULL) dump_rec(tbl, 0); } static void size_rec(Trie *t, uint d, size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { *rsize += sizeof(*t); Tindex i = t->index; if(Tindex_branch(i)) { *rbranches += 1; for(uint s = 0; s < 32; s++) { Tbitmap b = 1U << s; if(hastwig(i, b)) size_rec(Tbranch_twigs(t) + twigoff(i, b), d+1, rsize, rdepth, rbranches, rleaves); } } else { *rleaves += 1; *rdepth += d; } } void Tsize(Tbl *tbl, const char **rtype, size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { *rtype = "fn"; *rsize = *rdepth = *rbranches = *rleaves = 0; if(tbl != NULL) size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves); } @* {toy.c}. @(toy.c@>= #include #include #include #include #include #include #include #include "Tbl.h" static void bail(const char *msg, ...) { va_list ap; va_start(ap, msg); vfprintf(stderr, msg, ap); va_end(ap); fputc('\n', stderr); exit(2); } static unsigned long seq = 0; struct data { unsigned long seq; const char *k; }; int main(int argc, char *argv[]) { Tbl *t = 0, *tt; const char *p; void *q; struct data *v; size_t len, size, depth, branches, leaves; int i; for (i = 1; i < argc; i++) { p = argv[i]; switch (*p++) { case '+': len = strlen(p) + 1; size = (len + sizeof(*v) + 15)&-16; v = malloc(size); if (!v) bail("out of memory"); memcpy((char *)v + size - len, p, len); v->k = (char *)v + size - len; v->seq = seq++; tt = Tset(t, v->k, v); if (!tt) bail("Tset failed: %s", strerror(errno)); t = tt; break; case '-': v = Tget(t, p); t = Tdel(t, p); if (v) free(v); break; case '?': v = Tget(t, p); if (!v) puts("(not found)"); else printf("%lu\n", v->seq); assert(strcmp(p, v->k) == 0); break; case 'D': Tdump(t); break; case 'Z': Tsize(t, &p, &size, &depth, &branches, &leaves); printf("impl = %s; size = %zu, depth = %zu, " "branches = %zu, leaves = %zu\n", p, size, depth, branches, leaves); break; default: bail("unrecognized command `%c'", *--p); } } while (t) { p = 0; len = 0; Tnextl(t, &p, &len, &q); v = q; assert(p == v->k); t = Tdell(t, p, len); free(v); } return (0); } @ \end{document} \iftrue % gobble following `\fi'