From: Mark Wooding Date: Fri, 23 Aug 2024 17:12:59 +0000 (+0100) Subject: Intial revision. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/qpweb/commitdiff_plain/37a846fb8698a55ead8cbcc2358d2dbfb4fdd428 Intial revision. --- 37a846fb8698a55ead8cbcc2358d2dbfb4fdd428 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..084e6dd --- /dev/null +++ b/Makefile @@ -0,0 +1,99 @@ +all: +clean:: +.PHONY: all clean + +TARGETS = +CLEAN = + +V = 0 +V_AT = $(V_AT.$V) +V_AT.0 = @ + +v-tag = $(call v-tag.$V,$1,$2) +v-tag.0 = @printf " %-8s %s\n" '$1' '$2'; + +CTANGLE = ctangle +CWEAVE = cweave +LATEX = pdflatex +CC = gcc +OPTIMIZE = -Og -g3 +WARNINGS = -pedantic -Wall +CFLAGS = -std=gnu99 $(OPTIMIZE) $(WARNINGS) +LD = gcc +LDFLAGS = + +SRCEXT = +objify = $(addsuffix .o, \ + $(basename \ + $(filter $(addprefix %, $(SRCEXT)), \ + $1))) +CLEAN += *.o + +compile = $(call v-tag,$1,$@)$($1) -c $(CFLAGS) $4 \ + -MD -MF $(2:.o=.d) -o$2 $3 +DEPOBJS = +CLEAN += *.d + +SRCEXT += .c +%.o: %.c + $(call compile,CC,$@,$<) + +WEBS = qp.w +qp_WEBSRCS = + +qp_WEBSRCS += Tbl.h Tbl.c +qp_WEBSRCS += qp.h qp.c qp-debug.c +qp_WEBSRCS += fn.h fn.c fn-debug.c +qp_WEBSRCS += toy.c + +ALL_WEBSRCS = $(foreach w,$(WEBS:.w=), $($w_WEBSRCS)) +CLEAN += $(ALL_WEBSRCS) + +ALL_PDFS = $(WEBS:.w=.pdf) +TARGETS += $(ALL_PDFS) +CLEAN += $(foreach e,aux idx log out pdf scn tex toc, \ + $(WEBS:.w=.$e)) +%.tex: %.w + $(call v-tag,CWEAVE,$@)$(CWEAVE) $< +.SECONDARY: + +%.pdf: %.tex + $(call v-tag,LATEX,$@)$(LATEX) --interaction=batchmode $< + +otherwords = $(wordlist 2,$(words $1),$1) + +define tangle-web +$$(call otherwords,$$($1_WEBSRCS)): $$(firstword $$($1_WEBSRCS)) +$$(firstword $$($1_WEBSRCS)): $1.w + $$(call v-tag,CTANGLE,$$<)$$(CTANGLE) $$< +endef +$(foreach w,$(WEBS:.w=), $(eval $(call tangle-web,$w))) + +TARGETS += qptoy fntoy + +COMMON_SRCS = +COMMON_SRCS += Tbl.c +COMMON_SRCS += toy.c + +qptoy_SRCS = $(COMMON_SRCS) +qptoy_SRCS += qp.c qp-debug.c +qptoy_OBJS = $(call objify,$(qptoy_SRCS)) +DEPOBJS += $(qptoy_OBJS) +qptoy: $(qptoy_OBJS) + $(call v-tag,LD,$@)$(LD) $(LDFLAGS) -o$@ $+ +CLEAN += qptoy + +fntoy_SRCS = $(COMMON_SRCS) +fntoy_SRCS += fn.c fn-debug.c +fntoy_OBJS = $(call objify,$(fntoy_SRCS)) +DEPOBJS += $(fntoy_OBJS) +fntoy: $(fntoy_OBJS) + $(call v-tag,LD,$@)$(LD) $(LDFLAGS) -o$@ $+ +CLEAN += fntoy + +all: $(TARGETS) +clean::; rm -f $(CLEAN) + +p:; : $p + +-include $(DEPOBJS:.o=.d) diff --git a/qp.w b/qp.w new file mode 100644 index 0000000..3ba3414 --- /dev/null +++ b/qp.w @@ -0,0 +1,1406 @@ +%%% -*-cweb-*- + +\documentclass[baseclass = strayman]{cweb} + +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage[palatino, helvetica, courier, maths = cmr]{mdwfonts} +\usepackage{tikz} + +\def\bitor#1!{\begingroup\let\MOD=\OR#1\endgroup} +\def\email#1{\href{mailto:#1}{\nolinkurl{#1}}} + +\makeatletter +\def\CwebNumber#1{% % octal, hex, or decimal constant + \hbox{% + $% + \def\?{\kern.2em}% + \def\$##1{\egroup\sb{\rm##1}\bgroup}% suffix to constant + \def\_{\cdot 10^{\aftergroup}}% power of ten (via dirty trick) + \let\~\cwbb@@oct \let\^\cwbb@@hex + {#1}% + $}% + } +\def\CwebSectLayout{\normalfont\sffamily\bfseries} +\def\CwebPointer{\mathord{\rightarrow}} + +\title{QP-tries} +\author{Tony Finch \and Mark Wooding} + +\begin{document} + +\maketitle +\tableofcontents + +%% hack formatting for various identifiers +@f uint64_t int +@f uint32_t int +@f xor anything + +%%%-------------------------------------------------------------------------- +@* Introduction. + +Some blurb here? + +The code here is all Tony's. I haven't even reformatted it, though +\texttt{CWEAVE} applies its own formatting. + +%%%-------------------------------------------------------------------------- +@* Preliminaries. + +@*1 Copyright notice. + +More strictly, this is a notice about a lack of copyright. The qp-trie code +is, to the maximum extent permitted, not protected by copyright. Some +jurisdictions allow authors to abandon their copyrights explicitly; others +don't. + +@ = +/* Written by Tony Finch \email{dot@@dotat.at} + You may do anything with this. It has no warranty. + \url{http://creativecommons.org/publicdomain/zero/1.0/} + */ + + +@*1 Tables. + +A \emph{table} is an abstract data type which maintains a dynamic mapping +from null-terminated strings to non-null |void *| pointers. + +The caller is responsible for managing the the key and value storage. Value +pointers must be word-aligned: our tables will make use of low-order pointer +bits to store tags. General-purpose allocators like |malloc| will return +suitably aligned memory on conventional target platforms; modifications will +need to be made to support word-addressed machines which lack space for tag +bits. + +The interface is defined in a header file \texttt{Tbl.h}. + +@(Tbl.h@> = +// Tbl.h: an abstract API for tables with string keys. +@ +@# +#ifndef Tbl_h +#define Tbl_h +@# +@<\texttt{Tbl.h} declarations@> +@# +#endif + +@ The interface includes a number of convenience functions which can be +implemented in terms of other functions, rather than by each individual table +data structure. These common implementations are given in \texttt{Tbl.c}. + +@(Tbl.c@> = +// Tbl.c: simpler wrappers for core table functions +@ +@# +#include +#include +#include +@# +#include "Tbl.h" +@# +@ + +@ A table is represented by a pointer to the incomplete |struct| type |Tbl|. +There's no function to initialize a table: an empty table is simply +represented as a null pointer. Table pointers are passed by value; functions +such as |Tset| which update the table return the new pointer. + +@<\texttt{Tbl.h} declarations@> = +typedef struct Tbl Tbl; + +@ The |Tget|\ldots\ functions retrieve the information assoicated with a key. +The simplest function is |Tget|, which takes pointers to a table and a +null-terminated key string, and returns the associated value pointer; if +there is no entry for the given key, then |Tget| returns null. It's not +possible to associate a key with a null pointer, so this is not ambiguous. + +If you already have the string's length lying around for some reason, you can +call |Tgetl| instead, passing the length along. The length does not include +the null terminator, which must still be present. + +@<\texttt{Tbl.h} declarations@> = +void *Tgetl(Tbl *tbl, const char *key, size_t klen); +void *Tget(Tbl *tbl, const char *key); + +@ These functions are both implemented in terms of the primitive |Tgetkv|. +If an entry for the key string is found in the table, then |Tgetkv| stores +pointers to the original key string and the associated value in |*rkey| and +|*rval|, respectively, and returns |true|; otherwise, it returns |false| +leaving |*rkey| and |*rval| unchanged. + +@<\texttt{Tbl.h} declarations@> = +bool Tgetkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); + +@ The implementations of |Tget| and |Tgetl| are trivial. +@ = +void * +Tgetl(Tbl *tbl, const char *key, size_t len) { + const char *rkey = NULL; + void *rval = NULL; + if(Tgetkv(tbl, key, len, &rkey, &rval)) + return(rval); + else + return(NULL); +} + +void * +Tget(Tbl *tbl, const char *key) { + return(Tgetl(tbl, key, strlen(key))); +} + +@ The |Tset|\ldots\ functions add or modify entries in the table. They +return the (possibly modified) table pointer. The |Tset| function takes a +pointer to a null-terminated key string and the |void *| value pointer to +associate with it. If the table already contains an entry for the key, then +the entry is updated to associate the new value with the key; the old value +pointer is lost. Otherwise, a new entry is added to the table, associating +the key with the value. If the value pointer is null, then any existing +entry for the key is removed, as if |Tdel| had been called. + +The |Tsetl| function is the same, except that it also expects to be given the +length of the key string. As for |Tgetl| above, the length doesn't include +the null terminator, which must still be present. + +Updating the table can fail. Failures are reported by setting the global +variable |errno| to a nonzero value and returning a null pointer. The +possible |errno| values are as follows. +\begin{description} +\item[|EINVAL|] The value pointer is not word-aligned. +\item[|ENOMEM|] There was insufficient memory to extend the table. +\end{description} + +This means that the |Tset|\ldots\ functions can return null in two different +cases. +\begin{itemize} +\item If the value pointer is not null, then they return null if updating the + table fails. In this case, they set |errno| to a nonzero value to indicate + the problem. +\item If the value pointer is null, then they return null if the table is now + empty. This is a successful outcome; |errno| is not changed. +\end{itemize} + +@<\texttt{Tbl.h} declarations@> = +Tbl *Tsetl(Tbl *tbl, const char *key, size_t klen, void *value); +Tbl *Tset(Tbl *tbl, const char *key, void *value); + +@ The |Tset| function is implemented in terms of |Tsetl| in the obvious way. +The |Tsetl| function is primitive, implemented by the data structure. + +@ = +Tbl * +Tset(Tbl *tbl, const char *key, void *value) { + return(Tsetl(tbl, key, strlen(key), value)); +} + +@ The |Tdel|\ldots\ functions remove entries from the table. They return the +new table pointer, which will be null if the table is now empty. Following +the now-familiar pattern, the |Tdel| function takes pointers to a table and a +null-terminated key string; the |Tdell| function is additionally passed the +length of the string. + +@<\texttt{Tbl.h} declarations@> = +Tbl *Tdell(Tbl *tbl, const char *key, size_t klen); +Tbl *Tdel(Tbl *tbl, const char *key); + +@ The |Tdel| and |Tdell| functions are implemented in terms of the primitive +|Tdelkv|, which additionally stores pointers to the entry's original key and +value in |*rkey| and |*rvalue|. + +@<\texttt{Tbl.h} declarations@> = +Tbl *Tdelkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); + +@ The implementations of |Tdel| and |Tdell| are straightforward. + +@ = +Tbl * +Tdell(Tbl *tbl, const char *key, size_t len) { + const char *rkey = NULL; + void *rval = NULL; + return(Tdelkv(tbl, key, len, &rkey, &rval)); +} + +Tbl * +Tdel(Tbl *tbl, const char *key) { + return(Tdell(tbl, key, strlen(key))); +} + +@ We next turn to enumerating the entries in a table. The |Tnext| function +is given a pointer to a table, and the addresses |pkey| and |pvalue| of +key-string and value pointers. On entry, |*pkey| must point to a key string +for which an entry is present in the table; on exit, |*pkey| and |*pvalue| +are updated to point to the next key, in lexicographic order, for which there +is an entry in the table, |*pval| is set to the associated value pointer, and +|true| is returned. If no such entry exists, then |*pkey| is set to null and +|false| is returned. As a special case, if |*pkey| is null on entry, then +the key and value for the table entry whose key is lexicographically first is +found; if the table is entry, then, again, |false| is returned. The initial +value of |*pvalue| is ignored. + +The |Tnextl| function is similar, except that it also stores the length of +the key string in |*pklen|. The |Tnxt| function has a simpler interface, +intended for debugging: it receives a key string pointer, rather than its +address, as an argument, and returns a pointer to the lexicographically next +key string present in the table, or null if no such string exists; it doesn't +report the associated value pointer at all. + +@<\texttt{Tbl.h} declarations@> = +bool Tnextl(Tbl *tbl, const char **pkey, size_t *pklen, void **pvalue); +bool Tnext(Tbl *tbl, const char **pkey, void **pvalue); +const char *Tnxt(Tbl *tbl, const char *key); + +@ The |Tnext| and |Tnxt| functions are implemented in terms of |Tnextl| in +the obvious way. + +@ = +bool +Tnext(Tbl *tbl, const char **pkey, void **pvalue) { + size_t len = *pkey == NULL ? 0 : strlen(*pkey); + return(Tnextl(tbl, pkey, &len, pvalue)); +} + +const char * +Tnxt(Tbl *tbl, const char *key) { + void *value = NULL; + Tnext(tbl, &key, &value); + return(key); +} + +@ Finally, we define some diagnostic tools. + +The |Tsize| function reports statistics about a table + +@<\texttt{Tbl.h} declarations@> = +void Tdump(Tbl *tbl); +void Tsize(Tbl *tbl, const char **rtype, + size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves); + + +@* {qp.h}. + +@(qp.h@> = +// qp.h: tables implemented with quadbit popcount patricia tries. +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +// In a trie, keys are divided into digits depending on some radix +// e.g. base 2 for binary tries, base 256 for byte-indexed tries. +// When searching the trie, successive digits in the key, from most to +// least significant, are used to select branches from successive +// nodes in the trie, like: +// |for(i = 0; isbranch(node); i++) node = node->branch[key[i]];| +// All of the keys in a subtrie have identical prefixes. Tries do not +// need to store keys since they are implicit in the structure. +// +// A patricia trie or crit-bit trie is a binary trie which omits nodes that +// have only one child. Nodes are annotated with the index of the bit that +// is used to select the branch; indexes always increase as you go further +// into the trie. Each leaf has a copy of its key so that when you find a +// leaf you can verify that the untested bits match. +// +// The popcount() function counts the number of bits that are set in +// a word. It's also known as the Hamming weight; Knuth calls it +// "sideways add". https://en.wikipedia.org/wiki/popcount +// +// You can use popcount() to implement a sparse array of length N +// containing M < N members using bitmap of length N and a packed +// vector of M elements. A member i is present in the array if bit +// i is set, so M == popcount(bitmap). The index of member i in +// the packed vector is the popcount of the bits preceding i. +// |mask = 1 << i;| +// |if(bitmap & mask)| +// |member = vector[popcount(bitmap & mask-1)]| +// +// See "Hacker's Delight" by Hank Warren, section 5-1 "Counting 1 +// bits", subsection "applications". http://www.hackersdelight.org +// +// Phil Bagwell's hashed array-mapped tries (HAMT) use popcount for +// compact trie nodes. String keys are hashed, and the hash is used +// as the index to the trie, with radix $2^{32}$ or $2^{64}$. +// http://infoscience.epfl.ch/record/64394/files/triesearches.pdf +// http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf +// +// A qp trie uses its keys a quadbit (or nibble or half-byte) at a +// time. It is a radix $2^4$ patricia trie, so each node can have between +// 2 and 16 children. It uses a 16 bit word to mark which children are +// present and popcount to index them. The aim is to improve on crit-bit +// tries by reducing memory usage and the number of indirections +// required to look up a key. +// +// The worst case for a qp trie is when each branch has 2 children; +// then it is the same shape as a crit-bit trie. In this case there +// are n-1 internal branch nodes of two words each, so it is equally +// efficient as a crit-bit trie. If the key space is denser then +// branches have more children but the same overhead, so the memory +// usage is less. For maximally dense tries the overhead is: +// +// key length (bytes) $n$ +// number of leaves $256^n$ +// crit-bit branches $256^n - 1$ +// qp branches $1 + 16^{2n-1} = 1 + 256^n / 16$ +// crit-bit depth $8 n$ +// qp depth $2 n$ +// +// In practice, qp averages about 3.3 words per leaf vs. crit-bit's 4 +// words per leaf, and qp has about half the depth. + +typedef unsigned char byte; +typedef unsigned int uint; + +typedef uint Tbitmap; + +#if defined(HAVE_NARROW_CPU) || defined(HAVE_SLOW_POPCOUNT) + +// NOTE: 16 bits only + +static inline uint +popcount(Tbitmap w) { + w -= (w >> 1) & 0x5555; + w = (w & 0x3333) + ((w >> 2) & 0x3333); + w = (w + (w >> 4)) & 0x0F0F; + w = (w + (w >> 8)) & 0x00FF; + return(w); +} + +#else + +static inline uint +popcount(Tbitmap w) { + return((uint)__builtin_popcount(w)); +} + +#endif + +// Parallel popcount of the top and bottom 16 bits in a 32 bit word. This +// is probably only a win if your CPU is short of registers and/or integer +// units. NOTE: The caller needs to extract the results by masking with +// 0x00FF0000 and 0x000000FF for the top and bottom halves. + +static inline uint +popcount16x2(uint w) { + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0F0F0F0F; + w = w + (w >> 8); + return(w); +} + +// A trie node is two words on 64 bit machines, or three on 32 bit +// machines. A node can be a leaf or a branch. In a leaf, the value +// pointer must be word-aligned to allow for the tag bits. + +typedef struct Tleaf { + const char *key; + void *val; +} Tleaf; + +// Branch nodes are distinguished from leaf nodes using a couple +// of flag bits which act as a dynamic type tag. They can be: +// +// 0 -> node is a leaf +// 1 -> node is a branch, testing upper nibble +// 2 -> node is a branch, testing lower nibble +// +// A branch node is laid out so that the flag bits correspond to the +// least significant bits bits of one of the leaf node pointers. In a +// leaf node, that pointer must be word-aligned so that its flag bits +// are zero. We have chosen to place this restriction on the value +// pointer. +// +// A branch contains the index of the byte that it tests. The combined +// value \bitor|index << 2 % flags|! increases along the key in big-endian +// lexicographic order, and increases as you go deeper into the trie. +// All the keys below a branch are identical up to the nibble +// identified by the branch. +// +// A branch has a bitmap of which subtries ("twigs") are present. The +// flags, index, and bitmap are packed into one word. The other word +// is a pointer to an array of trie nodes, one for each twig that is +// present. + +// XXX We hope that the compiler will not punish us for abusing unions. + +// XXX This currently assumes a 64 bit little endian machine. +// On a 32 bit machine we could perhaps fit a branch in to two words +// without restricting the key length by making the index relative +// instead of absolute. If the gap between nodes is larger than a 16 +// bit offset allows, we can insert a stepping-stone branch with only +// one twig. This would make the code a bit more complicated... + +typedef struct Tbranch { + union Trie *twigs; + uint64_t + flags : 2, + index : 46, + bitmap : 16; +} Tbranch; + +typedef union Trie { + struct Tleaf leaf; + struct Tbranch branch; +} Trie; + +struct Tbl { + union Trie root; +}; + +// Test flags to determine type of this node. + +static inline bool +isbranch(Trie *t) { + return(t->branch.flags != 0); +} + +// Make a bitmask for testing a branch bitmap. +// +// mask: +// 1 -> 0xffff -> 0xfff0 -> 0xf0 +// 2 -> 0x0000 -> 0x000f -> 0x0f +// +// shift: +// 1 -> 1 -> 4 +// 2 -> 0 -> 0 + +static inline Tbitmap +nibbit(byte k, uint flags) { + uint mask = ((flags - 2) ^ 0x0f) & 0xff; + uint shift = (2 - flags) << 2; + return(1 << ((k & mask) >> shift)); +} + +// Extract a nibble from a key and turn it into a bitmask. + +static inline Tbitmap +twigbit(Trie *t, const char *key, size_t len) { + uint64_t i = t->branch.index; + if(i >= len) return(1); + return(nibbit((byte)key[i], t->branch.flags)); +} + +static inline bool +hastwig(Trie *t, Tbitmap bit) { + return(t->branch.bitmap & bit); +} + +static inline uint +twigoff(Trie *t, Tbitmap b) { + return(popcount(t->branch.bitmap & (b-1))); +} + +static inline Trie * +twig(Trie *t, uint i) { + return(&t->branch.twigs[i]); +} + +#ifdef HAVE_NARROW_CPU + +#define TWIGOFFMAX(off, max, t, b) do { \ + Tbitmap bitmap = t->branch.bitmap; \ + uint word = (bitmap << 16) | (bitmap & (b-1)); \ + uint counts = popcount16x2(word); \ + off = counts & 0xFF; \ + max = (counts >> 16) & 0xFF; \ + } while(0) + +#else + +#define TWIGOFFMAX(off, max, t, b) do { \ + off = twigoff(t, b); \ + max = popcount(t->branch.bitmap); \ + } while(0) + +#endif + +@* {qp.c}. + +@(qp.c@> = +// qp.c: tables implemented with quadbit popcount patricia tries. +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +#include +#include +#include +#include +#include +#include + +#include "Tbl.h" +#include "qp.h" + +bool +Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { + if(tbl == NULL) + return(false); + Trie *t = &tbl->root; + while(isbranch(t)) { + __builtin_prefetch(t->branch.twigs); + Tbitmap b = twigbit(t, key, len); + if(!hastwig(t, b)) + return(false); + t = twig(t, twigoff(t, b)); + } + if(strcmp(key, t->leaf.key) != 0) + return(false); + *pkey = t->leaf.key; + *pval = t->leaf.val; + return(true); +} + +static bool +next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { + if(isbranch(t)) { + // Recurse to find either this leaf (*pkey != NULL) + // or the next one (*pkey == NULL). + Tbitmap b = twigbit(t, *pkey, *plen); + uint s, m; TWIGOFFMAX(s, m, t, b); + for(; s < m; s++) + if(next_rec(twig(t, s), pkey, plen, pval)) + return(true); + return(false); + } + // We have found the next leaf. + if(*pkey == NULL) { + *pkey = t->leaf.key; + *plen = strlen(*pkey); + *pval = t->leaf.val; + return(true); + } + // We have found this leaf, so start looking for the next one. + if(strcmp(*pkey, t->leaf.key) == 0) { + *pkey = NULL; + *plen = 0; + return(false); + } + // No match. + return(false); +} + +bool +Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { + if(tbl == NULL) { + *pkey = NULL; + *plen = 0; + return(NULL); + } + return(next_rec(&tbl->root, pkey, plen, pval)); +} + +Tbl * +Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { + if(tbl == NULL) + return(NULL); + Trie *t = &tbl->root, *p = NULL; + Tbitmap b = 0; + while(isbranch(t)) { + __builtin_prefetch(t->branch.twigs); + b = twigbit(t, key, len); + if(!hastwig(t, b)) + return(tbl); + p = t; t = twig(t, twigoff(t, b)); + } + if(strcmp(key, t->leaf.key) != 0) + return(tbl); + *pkey = t->leaf.key; + *pval = t->leaf.val; + if(p == NULL) { + free(tbl); + return(NULL); + } + t = p; p = NULL; // Becuase t is the usual name + uint s, m; TWIGOFFMAX(s, m, t, b); + if(m == 2) { + // Move the other twig to the parent branch. + Trie *twigs = t->branch.twigs; + *t = *twig(t, !s); + free(twigs); + return(tbl); + } + memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1)); + t->branch.bitmap &= ~b; + // We have now correctly removed the twig from the trie, so if + // realloc() fails we can ignore it and continue to use the + // slightly oversized twig array. + Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1)); + if(twigs != NULL) t->branch.twigs = twigs; + return(tbl); +} + +Tbl * +Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { + // Ensure flag bits are zero. + if(((uint64_t)val & 3) != 0) { + errno = EINVAL; + return(NULL); + } + if(val == NULL) + return(Tdell(tbl, key, len)); + // First leaf in an empty tbl? + if(tbl == NULL) { + tbl = malloc(sizeof(*tbl)); + if(tbl == NULL) return(NULL); + tbl->root.leaf.key = key; + tbl->root.leaf.val = val; + return(tbl); + } + Trie *t = &tbl->root; + // Find the most similar leaf node in the trie. We will compare + // its key with our new key to find the first differing nibble, + // which can be at a lower index than the point at which we + // detect a difference. + while(isbranch(t)) { + __builtin_prefetch(t->branch.twigs); + Tbitmap b = twigbit(t, key, len); + // Even if our key is missing from this branch we need to + // keep iterating down to a leaf. It doesn't matter which + // twig we choose since the keys are all the same up to this + // index. Note that blindly using twigoff(t, b) can cause + // an out-of-bounds index if it equals twigmax(t). + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + t = twig(t, i); + } + // Do the keys differ, and if so, where? + size_t i; + for(i = 0; i <= len; i++) { + if(key[i] != t->leaf.key[i]) + goto newkey; + } + t->leaf.val = val; + return(tbl); +newkey:; // We have the branch's index; what are its flags? + byte k1 = (byte)key[i], k2 = (byte)t->leaf.key[i]; + uint f = k1 ^ k2; + f = (f & 0xf0) ? 1 : 2; + // Prepare the new leaf. + Tbitmap b1 = nibbit(k1, f); + Trie t1 = { .leaf = { .key = key, .val = val } }; + // Find where to insert a branch or grow an existing branch. + t = &tbl->root; + while(isbranch(t)) { + __builtin_prefetch(t->branch.twigs); + if(i == t->branch.index && f == t->branch.flags) + goto growbranch; + if(i == t->branch.index && f < t->branch.flags) + goto newbranch; + if(i < t->branch.index) + goto newbranch; + Tbitmap b = twigbit(t, key, len); + assert(hastwig(t, b)); + t = twig(t, twigoff(t, b)); + } +newbranch:; + Trie *twigs = malloc(sizeof(Trie) * 2); + if(twigs == NULL) return(NULL); + Trie t2 = *t; // Save before overwriting. + Tbitmap b2 = nibbit(k2, f); + t->branch.twigs = twigs; + t->branch.flags = f; + t->branch.index = i; + t->branch.bitmap = b1 | b2; + *twig(t, twigoff(t, b1)) = t1; + *twig(t, twigoff(t, b2)) = t2; + return(tbl); +growbranch:; + assert(!hastwig(t, b1)); + uint s, m; TWIGOFFMAX(s, m, t, b1); + twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1)); + if(twigs == NULL) return(NULL); + memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); + memmove(twigs+s, &t1, sizeof(Trie)); + t->branch.twigs = twigs; + t->branch.bitmap |= b1; + return(tbl); +} + +@* {fn.h}. + +@(fn.h@> = +// fn.h: quintet bit popcount patricia tries, new version +// +// This version uses somewhat different terminology than older +// variants. The location of a quintet in the key is now called its +// "offset", and the whole word containing the offset, bitmap, and tag +// bit is called the "index word" (by analogy with a database index). +// The precise quintet location is represented as a byte offset and a +// shift. Previously a flags field contained the isbranch tag and shift, +// but these are now separate. +// +// Instead of trying to use bit fields, this code uses accessor +// functions to split up a pair of words into their constituent parts. +// This should improve portability to machines with varying endianness +// and/or word size. +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +typedef unsigned char byte; +typedef unsigned int uint; + +typedef uint32_t Tbitmap; +typedef uint64_t Tindex; + +const char *dump_bitmap(Tbitmap w); + +static inline uint +byte_me(char c) { + return(c & 0xFF); +} + +static inline uint +word_up(const char *p) { + uint w = byte_me(p[0]) << 8; + if(w) w |= byte_me(p[1]); + return(w); +} + +#if defined(HAVE_SLOW_POPCOUNT) + +static inline uint +popcount(Tbitmap w) { + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0F0F0F0F; + w = (w * 0x01010101) >> 24; + return(w); +} + +#else + +static inline uint +popcount(Tbitmap w) { + return((uint)__builtin_popcount(w)); +} + +#endif + +typedef struct Tbl { + Tindex index; + void *ptr; +} Trie; + +// accessor functions, except for the index word + +#define Tset_field(cast, elem, type, field) \ + static inline void \ + Tset_##field(Trie *t, type field) { \ + t->elem = cast field; \ + } \ + struct dummy @; + +Tset_field((void *), ptr, Trie *, twigs); +Tset_field((Tindex), index, Tindex, index); +Tset_field((void *)(uint64_t), ptr, const char *, key); +Tset_field((Tindex), index, void *, val); + +static inline bool Tindex_branch(Tindex i); + +static inline bool isbranch(Trie *t) { + return(Tindex_branch(t->index)); +} + +#ifdef WITH_EXTRA_CHECKS +#define Tbranch(t) assert(isbranch(t)) +#define Tleaf(t) assert(!isbranch(t)) +#else +#define Tbranch(t) +#define Tleaf(t) +#endif + +#define Tcheck_get(type, tag, field, expr) \ + static inline type \ + tag##_##field(Trie *t) { \ + tag(t); \ + return(expr); \ + } \ + struct dummy @; + +Tcheck_get(Trie *, Tbranch, twigs, t->ptr); +Tcheck_get(const char *, Tleaf, key, t->ptr); +Tcheck_get(void *, Tleaf, val, (void*)t->index); + +// index word layout + +#define Tix_width_branch 1 +#define Tix_width_shift 3 +#define Tix_width_offset 28 +#define Tix_width_bitmap 32 + +#define Tix_base_branch 0 +#define Tix_base_shift (Tix_base_branch + Tix_width_branch) +#define Tix_base_offset (Tix_base_shift + Tix_width_shift) +#define Tix_base_bitmap (Tix_base_offset + Tix_width_offset) + +#define Tix_place(field) ((Tindex)(field) << Tix_base_##field) + +#define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL) + +#define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field) \ + & Tix_mask(field))) + +#define Tmaxlen Tix_mask(offset) + +// index word accessor functions + +#define Tindex_get(type, field) \ + static inline type \ + Tindex_##field(Tindex i) { \ + return(Tunmask(field, i)); \ + } \ + struct dummy @; + +Tindex_get(bool, branch); +Tindex_get(uint, shift); +Tindex_get(uint, offset); +Tindex_get(Tbitmap, bitmap); + +static inline Tindex +Tindex_new(uint shift, uint offset, Tbitmap bitmap) { + uint branch = 1; + return( Tix_place(branch) | + Tix_place(shift) | + Tix_place(offset) | + Tix_place(bitmap) ); +} + +static inline Tindex +Tbitmap_add(Tindex i, Tbitmap bitmap) { + return(i | Tix_place(bitmap)); +} + +static inline Tindex +Tbitmap_del(Tindex i, Tbitmap bitmap) { + return(i & ~Tix_place(bitmap)); +} + +// sanity checks! + +#ifndef static_assert +#define static_assert_cat(a,b) a##b +#define static_assert_name(line) static_assert_cat(static_assert_,line) +#define static_assert(must_be_true,message) \ + static const void *static_assert_name(__LINE__) \ + [must_be_true ? 2 : -1] = { \ + message, \ + &static_assert_name(__LINE__) } +#endif + +static_assert(Tix_base_bitmap + Tix_width_bitmap == 64, + "index fields must fill a 64 bit word"); + +static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678, + "extracting the bitmap works"); + +static_assert(Tunmask(offset,0x0420ULL) == 0x42, + "extracting the offset works"); + +static_assert(Tunmask(shift,0xFEDCBAULL) == 5, + "extracting the shift works"); + +@=// ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4]..@> +@=// | | | | | |@> +@=// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0@> +@=// | | | | | | | | |@> +@=// shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3@> + +static inline byte +knybble(const char *key, uint off, uint shift) { + uint word = word_up(key+off); + uint right = 16 - 5 - shift; + return((word >> right) & 0x1FU); +} + +static inline byte +nibble(Tindex i, const char *key, size_t len) { + uint off = Tindex_offset(i); + if(off >= len) return(0); + else return(knybble(key, off, Tindex_shift(i))); +} + +static inline Tbitmap +twigbit(Tindex i, const char *key, size_t len) { + return(1U << nibble(i, key, len)); +} + +static inline bool +hastwig(Tindex i, Tbitmap bit) { + return(Tindex_bitmap(i) & bit); +} + +static inline uint +twigoff(Tindex i, Tbitmap bit) { + return(popcount(Tindex_bitmap(i) & (bit-1))); +} + +#define TWIGOFFMAX(off, max, i, b) do { \ + off = twigoff(i, b); \ + max = popcount(Tindex_bitmap(i)); \ + } while(0) + +@* {fn.c}. + +@(fn.c@> = +// fn.h: quintet bit popcount patricia tries, new version +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +#include +#include +#include +#include +#include +#include + +#include "Tbl.h" +#include "fn.h" + +bool +Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) { + if(t == NULL) + return(false); + while(isbranch(t)) { + __builtin_prefetch(t->ptr); + Tindex i = t->index; + Tbitmap b = twigbit(i, key, len); + if(!hastwig(i, b)) + return(false); + t = Tbranch_twigs(t) + twigoff(i, b); + } + if(strcmp(key, Tleaf_key(t)) != 0) + return(false); + *pkey = Tleaf_key(t); + *pval = Tleaf_val(t); + return(true); +} + +static bool +next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { + Tindex i = t->index; + if(Tindex_branch(i)) { + // Recurse to find either this leaf (*pkey != NULL) + // or the next one (*pkey == NULL). + Tbitmap b = twigbit(i, *pkey, *plen); + uint s, m; TWIGOFFMAX(s, m, i, b); + for(; s < m; s++) + if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval)) + return(true); + return(false); + } + // We have found the next leaf. + if(*pkey == NULL) { + *pkey = Tleaf_key(t); + *plen = strlen(*pkey); + *pval = Tleaf_val(t); + return(true); + } + // We have found this leaf, so start looking for the next one. + if(strcmp(*pkey, Tleaf_key(t)) == 0) { + *pkey = NULL; + *plen = 0; + return(false); + } + // No match. + return(false); +} + +bool +Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { + if(tbl == NULL) { + *pkey = NULL; + *plen = 0; + return(NULL); + } + return(next_rec(tbl, pkey, plen, pval)); +} + +Tbl * +Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { + if(tbl == NULL) + return(NULL); + Trie *t = tbl, *p = NULL; + Tindex i = 0; + Tbitmap b = 0; + while(isbranch(t)) { + __builtin_prefetch(t->ptr); + i = t->index; + b = twigbit(i, key, len); + if(!hastwig(i, b)) + return(tbl); + p = t; t = Tbranch_twigs(t) + twigoff(i, b); + } + if(strcmp(key, Tleaf_key(t)) != 0) + return(tbl); + *pkey = Tleaf_key(t); + *pval = Tleaf_val(t); + if(p == NULL) { + free(tbl); + return(NULL); + } + Trie *twigs = Tbranch_twigs(p); + uint m = popcount(Tindex_bitmap(i)); + assert(twigs <= t && t < twigs+m); + if(m == 2) { + // Move the other twig to the parent branch. + *p = twigs[twigs == t]; + free(twigs); + return(tbl); + } + memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie)); + p->index = Tbitmap_del(i, b); + // We have now correctly removed the twig from the trie, so if + // realloc() fails we can ignore it and continue to use the + // slightly oversized twig array. + twigs = realloc(twigs, sizeof(Trie) * (m - 1)); + if(twigs != NULL) Tset_twigs(p, twigs); + return(tbl); +} + +Tbl * +Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { + if(Tindex_branch((Tindex)val) || len > Tmaxlen) { + errno = EINVAL; + return(NULL); + } + if(val == NULL) + return(Tdell(tbl, key, len)); + // First leaf in an empty tbl? + if(tbl == NULL) { + tbl = malloc(sizeof(*tbl)); + if(tbl == NULL) return(NULL); + Tset_key(tbl, key); + Tset_val(tbl, val); + return(tbl); + } + Trie *t = tbl; + // Find the most similar leaf node in the trie. We will compare + // its key with our new key to find the first differing nibble, + // which can be at a lower index than the point at which we + // detect a difference. + while(isbranch(t)) { + __builtin_prefetch(t->ptr); + Tindex i = t->index; + Tbitmap b = twigbit(i, key, len); + // Even if our key is missing from this branch we need to + // keep iterating down to a leaf. It doesn't matter which + // twig we choose since the keys are all the same up to this + // index. Note that blindly using twigoff(t, b) can cause + // an out-of-bounds index if it equals twigmax(t). + uint s = hastwig(i, b) ? twigoff(i, b) : 0; + t = Tbranch_twigs(t) + s; + } + // Do the keys differ, and if so, where? + uint off, xor, shf; + const char *tkey = Tleaf_key(t); + for(off = 0; off <= len; off++) { + xor = (byte)key[off] ^ (byte)tkey[off]; + if(xor != 0) goto newkey; + } + Tset_val(t, val); + return(tbl); +newkey:; // We have the branch's byte index; what is its chunk index? + uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8; + uint qo = bit / 5; + off = qo * 5 / 8; + shf = qo * 5 % 8; + // re-index keys with adjusted offset + Tbitmap nb = 1U << knybble(key,off,shf); + Tbitmap tb = 1U << knybble(tkey,off,shf); + // Prepare the new leaf. + Trie nt; + Tset_key(&nt, key); + Tset_val(&nt, val); + // Find where to insert a branch or grow an existing branch. + t = tbl; + Tindex i = 0; + while(isbranch(t)) { + __builtin_prefetch(t->ptr); + i = t->index; + if(off == Tindex_offset(i) && shf == Tindex_shift(i)) + goto growbranch; + if(off == Tindex_offset(i) && shf < Tindex_shift(i)) + goto newbranch; + if(off < Tindex_offset(i)) + goto newbranch; + Tbitmap b = twigbit(i, key, len); + assert(hastwig(i, b)); + t = Tbranch_twigs(t) + twigoff(i, b); + } +newbranch:; + Trie *twigs = malloc(sizeof(Trie) * 2); + if(twigs == NULL) return(NULL); + i = Tindex_new(shf, off, nb | tb); + twigs[twigoff(i, nb)] = nt; + twigs[twigoff(i, tb)] = *t; + Tset_twigs(t, twigs); + Tset_index(t, i); + return(tbl); +growbranch:; + assert(!hastwig(i, nb)); + uint s, m; TWIGOFFMAX(s, m, i, nb); + twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1)); + if(twigs == NULL) return(NULL); + memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); + memmove(twigs+s, &nt, sizeof(Trie)); + Tset_twigs(t, twigs); + Tset_index(t, Tbitmap_add(i, nb)); + return(tbl); +} + +@* {qp-debug.c}. + +@(qp-debug.c@> = +// qp-debug.c: qp trie debug support +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +#include +#include +#include +#include +#include + +#include "Tbl.h" +#include "qp.h" + +static void +dump_rec(Trie *t, int d) { + if(isbranch(t)) { + printf("Tdump%*s branch %p %zu %d\n", d, "", t, + (size_t)t->branch.index, t->branch.flags); + int dd = 2 + t->branch.index * 4 + (t->branch.flags - 1) * 2; + assert(dd > d); + for(uint i = 0; i < 16; i++) { + uint b = 1 << i; + if(hastwig(t, b)) { + printf("Tdump%*s twig %d\n", d, "", i); + dump_rec(twig(t, twigoff(t, b)), dd); + } + } + } else { + printf("Tdump%*s leaf %p\n", d, "", t); + printf("Tdump%*s leaf key %p %s\n", d, "", + t->leaf.key, t->leaf.key); + printf("Tdump%*s leaf val %p\n", d, "", + t->leaf.val); + } +} + +void +Tdump(Tbl *tbl) { + printf("Tdump root %p\n", tbl); + if(tbl != NULL) + dump_rec(&tbl->root, 0); +} + +static void +size_rec(Trie *t, uint d, + size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { + *rsize += sizeof(*t); + if(isbranch(t)) { + *rbranches += 1; + for(uint i = 0; i < 16; i++) { + Tbitmap b = 1 << i; + if(hastwig(t, b)) + size_rec(twig(t, twigoff(t, b)), + d+1, rsize, rdepth, rbranches, rleaves); + } + } else { + *rleaves += 1; + *rdepth += d; + } +} + +void +Tsize(Tbl *tbl, const char **rtype, + size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { + *rtype = "qp"; + *rsize = *rdepth = *rbranches = *rleaves = 0; + if(tbl != NULL) + size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); +} + +@* {fn-debug.c}. + +@(fn-debug.c@> = +// fn-debug.c: fn trie debug support +// +// Written by Tony Finch +// You may do anything with this. It has no warranty. +// + +#include +#include +#include +#include +#include + +#include "Tbl.h" +#include "fn.h" + +const char * +dump_bitmap(Tbitmap w) { + static char buf[32*3]; + int size = (int)sizeof(buf), n = 0; + n += snprintf(buf+n, size-n, "("); + for(uint s = 0; s < 32; s++) { + Tbitmap b = 1 << s; + if(w & b) + n += snprintf(buf+n, size-n, "%u,", s); + } + if(n > 1) + buf[n-1] = ')'; + return buf; +} + +static void +dump_rec(Trie *t, uint d) { + Tindex i = t->index; + if(Tindex_branch(i)) { + printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t, + dump_bitmap(Tindex_bitmap(i)), + (size_t)Tindex_offset(i), Tindex_shift(i)); + uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i); + assert(dd > d); + for(uint s = 0; s < 32; s++) { + Tbitmap b = 1 << s; + if(hastwig(i, b)) { + printf("Tdump%*s twig %d\n", d, "", s); + dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd); + } + } + } else { + printf("Tdump%*s leaf %p\n", d, "", + (void *)t); + printf("Tdump%*s leaf key %p %s\n", d, "", + (const void *)Tleaf_key(t), Tleaf_key(t)); + printf("Tdump%*s leaf val %p\n", d, "", + (void *)Tleaf_val(t)); + } +} + +void +Tdump(Tbl *tbl) { + printf("Tdump root %p\n", (void*)tbl); + if(tbl != NULL) + dump_rec(tbl, 0); +} + +static void +size_rec(Trie *t, uint d, + size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { + *rsize += sizeof(*t); + Tindex i = t->index; + if(Tindex_branch(i)) { + *rbranches += 1; + for(uint s = 0; s < 32; s++) { + Tbitmap b = 1U << s; + if(hastwig(i, b)) + size_rec(Tbranch_twigs(t) + twigoff(i, b), + d+1, rsize, rdepth, rbranches, rleaves); + } + } else { + *rleaves += 1; + *rdepth += d; + } +} + +void +Tsize(Tbl *tbl, const char **rtype, + size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { + *rtype = "fn"; + *rsize = *rdepth = *rbranches = *rleaves = 0; + if(tbl != NULL) + size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves); +} + +@* {toy.c}. + +@(toy.c@> = +#include +#include +#include +#include +#include +#include +#include + +#include "Tbl.h" + +static void bail(const char *msg, ...) +{ + va_list ap; + + va_start(ap, msg); + vfprintf(stderr, msg, ap); + va_end(ap); + fputc('\n', stderr); + exit(2); +} + +static unsigned long seq = 0; +struct data { + unsigned long seq; + const char *k; +}; + +int main(int argc, char *argv[]) +{ + Tbl *t = 0, *tt; + const char *p; + void *q; + struct data *v; + size_t len, size, depth, branches, leaves; + int i; + + for (i = 1; i < argc; i++) { + p = argv[i]; + switch (*p++) { + case '+': + v = malloc(sizeof(*v)); if (!v) bail("out of memory"); + v->seq = seq; v->k = p; + tt = Tset(t, p, v); + if (!tt) bail("Tset failed: %s", strerror(errno)); + t = tt; break; + case '-': + v = Tget(t, p); t = Tdel(t, p); + if (v) free(v); + break; + case '?': + v = Tget(t, p); + if (!v) puts("(not found)"); + else printf("%lu\n", v->seq); + break; + case 'D': + Tdump(t); + break; + case 'Z': + Tsize(t, &p, &size, &depth, &branches, &leaves); + printf("impl = %s; size = %zu, depth = %zu, " + "branches = %zu, leaves = %zu\n", + p, size, depth, branches, leaves); + break; + default: + bail("unrecognized command `%c'", *--p); + } + } + + while (t) { + p = 0; len = 0; Tnextl(t, &p, &len, &q); v = q; assert(p == v->k); + t = Tdell(t, p, len); free(v); + } + + return (0); +} + +@ +\end{document} \iftrue % gobble following `\fi'