From: Mark Wooding Date: Sun, 30 Mar 2025 21:44:20 +0000 (+0100) Subject: chain.c, etc.: Add POSIX hsearch(3) and tsearch(3) support. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/wordchain/commitdiff_plain/fded5b8a27167974346bf12bec33994308bfa81e?ds=inline chain.c, etc.: Add POSIX hsearch(3) and tsearch(3) support. GNU libc's hsearch(3) is surprisingly fast on its own, but it's sadly fixed size, and there's no fast way to resize it. So there are two implementations, one of which will grow the table on demand, and the other just allocates it big enough to begin with. (Actually, they're the same, but the `fixed size' version just makes a huge initial allocation.) --- diff --git a/.gitignore b/.gitignore index 9c8acf6..9710ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ /chain.golang /chain.libavl-* /chain.mLib-* +/chain.posix-* /chain.qptrie-* /chain.rust-* /chain.sgt-* diff --git a/Makefile b/Makefile index 00f3fc3..bbcd22b 100644 --- a/Makefile +++ b/Makefile @@ -166,6 +166,15 @@ mLib_VARIANTS = sym mLib_LIBS = -lmLib mLib-sym_CFLAGS = -DTREE=MLIB_SYM +TREELIBS += posix +posix_VARIANTS = +posix_VARIANTS += hsearch1 +posix-hsearch1_CFLAGS += -DTREE=POSIX_HSEARCH +posix_VARIANTS += hsearchg +posix-hsearchg_CFLAGS += -DTREE=POSIX_HSEARCH -DHSEARCH_INITSZ=1048576 +posix_VARIANTS += tsearch +posix-tsearch_CFLAGS += -DTREE=POSIX_TSEARCH + c++_VARIANTS += map c++-map_CXXFLAGS = -DTREE=CXX_MAP c++_VARIANTS += uomap diff --git a/chain.c b/chain.c index 50a92b8..69de698 100644 --- a/chain.c +++ b/chain.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include #include #include @@ -24,6 +25,8 @@ #define LIBAVL_TBST 18 #define LIBAVL_TRB 19 #define MLIB_SYM 20 +#define POSIX_HSEARCH 21 +#define POSIX_TSEARCH 22 #define QPITER_FANF 1 #define QPITER_MDW 2 @@ -125,6 +128,14 @@ # include # include # include +#elif TREE == POSIX_HSEARCH +# include +# include +# ifndef HSEARCH_INITSZ +# define HSEARCH_INITSZ 1024 +# endif +#elif TREE == POSIX_TSEARCH +# include #else # error "unknown `TREE' value" #endif @@ -194,7 +205,7 @@ struct node { short chlen; # define CHLEN(node) ((node)->chlen) -#elif USE_LIBAVL +#elif USE_LIBAVL || TREE == POSIX_TSEARCH union { struct word w; @@ -215,7 +226,9 @@ struct node { #endif -#if TREE == MLIB_SYM || (USE_QPTRIE && QPTRIE_ITER == QPITER_LIST) +#if TREE == MLIB_SYM || \ + (USE_QPTRIE && QPTRIE_ITER == QPITER_LIST) || \ + TREE == POSIX_HSEARCH || TREE == POSIX_TSEARCH struct node *next; #endif @@ -236,7 +249,7 @@ static void bail(const char *msg, ...) exit(2); } -#if USE_XYLA || TREE == SGT_TREE234 || USE_LIBAVL +#if USE_XYLA || TREE == SGT_TREE234 || USE_LIBAVL || TREE == POSIX_TSEARCH static int compare_words(const char *a, size_t alen, const char *b, size_t blen) @@ -605,6 +618,117 @@ static void free_node(void *n, void *arg) { free(n); } # define CHECK do ; while (0) +#elif TREE == POSIX_HSEARCH + +# define DECLS \ + ENTRY *e, ee; \ + size_t hmax = HSEARCH_INITSZ, hcap = 3*(hmax/4), hused = 0; \ + struct node *all = 0, **all_tail = &all \ + +# define INIT do { \ + if (!hcreate(hmax)) \ + bail("hcreate (initial) failed: %s", strerror(errno)); \ + } while (0) + +# define INSERT do { \ + if (hused >= hcap) { \ + hdestroy(); \ + hmax *= 2; hcap *= 2; \ + if (!hcreate(hmax)) \ + bail("hcreate (grow) failed: %s", strerror(errno)); \ + for (next = all; next; next = next->next) { \ + ee.key = (/*unconst*/ char *)WORDPTR(next); ee.data = next; \ + if (!hsearch(ee, ENTER)) \ + bail("hsearch (grow) failed: table full"); \ + } \ + } \ + ee.key = (char *)(node + 1); ee.data = 0; \ + e = hsearch(ee, ENTER); \ + if (!e) \ + bail("hsearch (insert) failed: table full"); \ + else if (!e->data) { \ + e->data = node; \ + node->next = 0; *all_tail = node; all_tail = &node->next; \ + hused++; \ + } else { \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ + free(node); node = 0; \ + } \ + } while (0) + +# define ITERATE for (next = all; node = next, node; next = next->next) + +# define FOCUS do ; while (0) + +# define GETPREFIX do { \ + word.n = WORDLEN(node) - 1; assert(word.n < WORDMAX); \ + memcpy(buf, WORDPTR(node), word.n); buf[word.n] = 0; \ + } while (0) + +# define LOOKUP \ + (ee.key = (/*unconst*/ char *)word.p, \ + e = hsearch(ee, FIND), e ? e->data : 0) + +# define FREE do { \ + node = all; \ + while (node) { next = node->next; free(node); node = next; } \ + hdestroy(); \ + } while (0) + +# define CHECK do ; while (0) + +#elif TREE == POSIX_TSEARCH + +typedef struct { struct node *node; } TNODE; + +static int word_cmp(const void *x, const void *y) +{ + const struct word *nx = x, *ny = y; + + return (compare_words(nx->p, nx->n, ny->p, ny->n)); +} + +# define DECLS \ + void *root = 0; \ + struct node *all = 0, **all_tail = &all; \ + TNODE *tn + +# define INIT do ; while (0) + +# define PREPNODE do { \ + node = malloc(sizeof(*node) + word.n + 1); \ + if (!node) bail("out of memory"); \ + memcpy(node + 1, buf, word.n + 1); \ + node->u.w.p = (const char *)(node + 1); node->u.w.n = word.n; \ + } while (0) + +# define INSERT do { \ + tn = tsearch(node, &root, word_cmp); \ + if (tn->node == node) { \ + node->next = 0; *all_tail = node; all_tail = &node->next; \ + } else { \ + fprintf(stderr, ";; duplicate `%s'\n", buf); \ + free(node); node = 0; \ + } \ + } while (0) + +# define ITERATE for (next = all; node = next, node; next = next->next) + +# define FOCUS do ; while (0) + +# define LOOKUP (tn = tfind(&word, &root, word_cmp), tn ? tn->node : 0) + +# ifdef __GLIBC__ +# define FREE do tdestroy(root, free); while (0) +# else +# define FREE do { \ + for (next = all; node = next, node; next = next->next) \ + { tdelete(node, &root, word_cmp); free(node); } \ + } while (0) +# endif + +# define CHECK do ; while (0) + #endif #ifndef PREPNODE diff --git a/format-data b/format-data index 327157b..c02ab4f 100755 --- a/format-data +++ b/format-data @@ -22,6 +22,9 @@ our %MAP = "lisp-sbcl" => 'Common Lisp (SBCL)', "mLib-sym" => 'mLib, \textsf{sym} hash table', "perl" => 'Perl hash', + "posix-hsearch1" => 'POSIX hsearch (glibc, fixed size)', + "posix-hsearchg" => 'POSIX hsearch (glibc, growing)', + "posix-tsearch" => 'POSIX tsearch (glibc)', "python" => 'Python \textsf{dict}', "qptrie-qp-fanf" => 'Four-bit QP-trie, recursive \textsf{Tnextl}', "qptrie-qp-mdw" => 'Four-bit QP-trie, iterative \textsf{Tnextl}', diff --git a/graphs.tex b/graphs.tex index e2cd375..0a96eb8 100644 --- a/graphs.tex +++ b/graphs.tex @@ -50,6 +50,7 @@ lisp-sbcl, mLib-sym, %perl, python, + posix-hsearch1, posix-hsearchg, posix-tsearch, qptrie-qp-fanf, qptrie-qp-mdw, qptrie-qp-list, qptrie-fn-fanf, qptrie-fn-mdw, qptrie-fn-list, rust-btree, rust-hash, diff --git a/summarize b/summarize index b51a430..82d0f77 100755 --- a/summarize +++ b/summarize @@ -11,6 +11,7 @@ our %IMPL = "lisp" => ["ccl", "clisp", "cmucl", "ecl", "sbcl"], "mLib" => ["sym"], "perl" => undef, + "posix" => ["hsearch1", "hsearchg", "tsearch"], "python" => undef, "qptrie" => ["qp-fanf", "qp-mdw", "qp-list", "fn-fanf", "fn-mdw", "fn-list"],