chiark / gitweb /
chain.c, etc.: Add POSIX hsearch(3) and tsearch(3) support.
authorMark Wooding <mdw@distorted.org.uk>
Sun, 30 Mar 2025 21:44:20 +0000 (22:44 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Sun, 30 Mar 2025 21:48:44 +0000 (22:48 +0100)
GNU libc's hsearch(3) is surprisingly fast on its own, but it's sadly
fixed size, and there's no fast way to resize it.  So there are two
implementations, one of which will grow the table on demand, and the
other just allocates it big enough to begin with.

(Actually, they're the same, but the `fixed size' version just makes a
huge initial allocation.)

.gitignore
Makefile
chain.c
format-data
graphs.tex
summarize

index 9c8acf691deb962ca1860df8e9ca0202e01d737c..9710ed81a61a3d371f51468b86de69740736b113 100644 (file)
@@ -15,6 +15,7 @@
 /chain.golang
 /chain.libavl-*
 /chain.mLib-*
+/chain.posix-*
 /chain.qptrie-*
 /chain.rust-*
 /chain.sgt-*
index 00f3fc3325c461d796a661415d0675cd654bed86..bbcd22b2c359fa16ba0b63b30fa1690caf46827d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -166,6 +166,15 @@ mLib_VARIANTS               = sym
 mLib_LIBS               = -lmLib
 mLib-sym_CFLAGS                 = -DTREE=MLIB_SYM
 
+TREELIBS               += posix
+posix_VARIANTS          =
+posix_VARIANTS         += hsearch1
+posix-hsearch1_CFLAGS  += -DTREE=POSIX_HSEARCH
+posix_VARIANTS         += hsearchg
+posix-hsearchg_CFLAGS  += -DTREE=POSIX_HSEARCH -DHSEARCH_INITSZ=1048576
+posix_VARIANTS         += tsearch
+posix-tsearch_CFLAGS   += -DTREE=POSIX_TSEARCH
+
 c++_VARIANTS           += map
 c++-map_CXXFLAGS        = -DTREE=CXX_MAP
 c++_VARIANTS           += uomap
diff --git a/chain.c b/chain.c
index 50a92b885c7ace00f151bb314e89ee71db2e73b0..69de698db1a8f78c993007d9d21905c670f726a3 100644 (file)
--- a/chain.c
+++ b/chain.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <assert.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -24,6 +25,8 @@
 #define LIBAVL_TBST 18
 #define LIBAVL_TRB 19
 #define MLIB_SYM 20
+#define POSIX_HSEARCH 21
+#define POSIX_TSEARCH 22
 
 #define QPITER_FANF 1
 #define QPITER_MDW 2
 #  include <unistd.h>
 #  include <mLib/sym.h>
 #  include <mLib/unihash.h>
+#elif TREE == POSIX_HSEARCH
+#  include <errno.h>
+#  include <search.h>
+#  ifndef HSEARCH_INITSZ
+#    define HSEARCH_INITSZ 1024
+#  endif
+#elif TREE == POSIX_TSEARCH
+#  include <search.h>
 #else
 #  error "unknown `TREE' value"
 #endif
@@ -194,7 +205,7 @@ struct node {
   short chlen;
 # define CHLEN(node) ((node)->chlen)
 
-#elif USE_LIBAVL
+#elif USE_LIBAVL || TREE == POSIX_TSEARCH
 
   union {
     struct word w;
@@ -215,7 +226,9 @@ struct node {
 
 #endif
 
-#if TREE == MLIB_SYM || (USE_QPTRIE && QPTRIE_ITER == QPITER_LIST)
+#if TREE == MLIB_SYM || \
+    (USE_QPTRIE && QPTRIE_ITER == QPITER_LIST) || \
+    TREE == POSIX_HSEARCH || TREE == POSIX_TSEARCH
   struct node *next;
 #endif
 
@@ -236,7 +249,7 @@ static void bail(const char *msg, ...)
   exit(2);
 }
 
-#if USE_XYLA || TREE == SGT_TREE234 || USE_LIBAVL
+#if USE_XYLA || TREE == SGT_TREE234 || USE_LIBAVL || TREE == POSIX_TSEARCH
 
 static int compare_words(const char *a, size_t alen,
                         const char *b, size_t blen)
@@ -605,6 +618,117 @@ static void free_node(void *n, void *arg) { free(n); }
 
 #  define CHECK do ; while (0)
 
+#elif TREE == POSIX_HSEARCH
+
+#  define DECLS                                                                \
+       ENTRY *e, ee;                                                   \
+       size_t hmax = HSEARCH_INITSZ, hcap = 3*(hmax/4), hused = 0;     \
+       struct node *all = 0, **all_tail = &all                         \
+
+#  define INIT do {                                                    \
+       if (!hcreate(hmax))                                             \
+         bail("hcreate (initial) failed: %s", strerror(errno));        \
+   } while (0)
+
+#  define INSERT do {                                                  \
+       if (hused >= hcap) {                                            \
+         hdestroy();                                                   \
+         hmax *= 2; hcap *= 2;                                         \
+         if (!hcreate(hmax))                                           \
+           bail("hcreate (grow) failed: %s", strerror(errno));         \
+         for (next = all; next; next = next->next) {                   \
+           ee.key = (/*unconst*/ char *)WORDPTR(next); ee.data = next; \
+           if (!hsearch(ee, ENTER))                                    \
+             bail("hsearch (grow) failed: table full");                \
+         }                                                             \
+       }                                                               \
+       ee.key = (char *)(node + 1); ee.data = 0;                       \
+       e = hsearch(ee, ENTER);                                         \
+       if (!e)                                                         \
+         bail("hsearch (insert) failed: table full");                  \
+       else if (!e->data) {                                            \
+         e->data = node;                                               \
+         node->next = 0; *all_tail = node; all_tail = &node->next;     \
+         hused++;                                                      \
+       } else {                                                        \
+         fprintf(stderr, ";; duplicate `%s'\n", buf);                  \
+         free(node); node = 0;                                         \
+       }                                                               \
+   } while (0)
+
+#  define ITERATE for (next = all; node = next, node; next = next->next)
+
+#  define FOCUS do ; while (0)
+
+#  define GETPREFIX do {                                               \
+       word.n = WORDLEN(node) - 1; assert(word.n < WORDMAX);           \
+       memcpy(buf, WORDPTR(node), word.n); buf[word.n] = 0;            \
+   } while (0)
+
+#  define LOOKUP                                                       \
+       (ee.key = (/*unconst*/ char *)word.p,                           \
+        e = hsearch(ee, FIND), e ? e->data : 0)
+
+#  define FREE do {                                                    \
+       node = all;                                                     \
+       while (node) { next = node->next; free(node); node = next; }    \
+       hdestroy();                                                     \
+   } while (0)
+
+#  define CHECK do ; while (0)
+
+#elif TREE == POSIX_TSEARCH
+
+typedef struct { struct node *node; } TNODE;
+
+static int word_cmp(const void *x, const void *y)
+{
+  const struct word *nx = x, *ny = y;
+
+  return (compare_words(nx->p, nx->n, ny->p, ny->n));
+}
+
+#  define DECLS                                                                \
+       void *root = 0;                                                 \
+       struct node *all = 0, **all_tail = &all;                        \
+       TNODE *tn
+
+#  define INIT do ; while (0)
+
+#  define PREPNODE do {                                                        \
+       node = malloc(sizeof(*node) + word.n + 1);                      \
+         if (!node) bail("out of memory");                             \
+       memcpy(node + 1, buf, word.n + 1);                              \
+       node->u.w.p = (const char *)(node + 1); node->u.w.n = word.n;   \
+   } while (0)
+
+#  define INSERT do {                                                  \
+       tn = tsearch(node, &root, word_cmp);                            \
+       if (tn->node == node) {                                         \
+         node->next = 0; *all_tail = node; all_tail = &node->next;     \
+       } else {                                                        \
+         fprintf(stderr, ";; duplicate `%s'\n", buf);                  \
+         free(node); node = 0;                                         \
+       }                                                               \
+   } while (0)
+
+#  define ITERATE for (next = all; node = next, node; next = next->next)
+
+#  define FOCUS do ; while (0)
+
+#  define LOOKUP (tn = tfind(&word, &root, word_cmp), tn ? tn->node : 0)
+
+#  ifdef __GLIBC__
+#    define FREE do tdestroy(root, free); while (0)
+#  else
+#    define FREE do {                                                  \
+       for (next = all; node = next, node; next = next->next)          \
+         { tdelete(node, &root, word_cmp); free(node); }               \
+     } while (0)
+#  endif
+
+#  define CHECK do ; while (0)
+
 #endif
 
 #ifndef PREPNODE
index 327157b6e5e9ae4b0ed2f6bfd02865d0b91e092c..c02ab4f310b9d0b43e23131a02ce463f9b4d4cdc 100755 (executable)
@@ -22,6 +22,9 @@ our %MAP =
    "lisp-sbcl"     => 'Common Lisp (SBCL)',
    "mLib-sym"      => 'mLib, \textsf{sym} hash table',
    "perl"          => 'Perl hash',
+   "posix-hsearch1" => 'POSIX hsearch (glibc, fixed size)',
+   "posix-hsearchg" => 'POSIX hsearch (glibc, growing)',
+   "posix-tsearch"  => 'POSIX tsearch (glibc)',
    "python"        => 'Python \textsf{dict}',
    "qptrie-qp-fanf" => 'Four-bit QP-trie, recursive \textsf{Tnextl}',
    "qptrie-qp-mdw"  => 'Four-bit QP-trie, iterative \textsf{Tnextl}',
index e2cd375f5a8a581af77a78c63b976e84c276a5bc..0a96eb8465babc26165cf22cf7910aa91b4fa74a 100644 (file)
@@ -50,6 +50,7 @@
             lisp-sbcl,
             mLib-sym,
             %perl, python,
+            posix-hsearch1, posix-hsearchg, posix-tsearch,
             qptrie-qp-fanf, qptrie-qp-mdw, qptrie-qp-list,
             qptrie-fn-fanf, qptrie-fn-mdw, qptrie-fn-list,
             rust-btree, rust-hash,
index b51a430237a4f4fda8b1144eecc42f8a2a0618d4..82d0f77babb76ce6f69ccbc738d8aa1f5e82ebc5 100755 (executable)
--- a/summarize
+++ b/summarize
@@ -11,6 +11,7 @@ our %IMPL =
    "lisp"      => ["ccl", "clisp", "cmucl", "ecl", "sbcl"],
    "mLib"      => ["sym"],
    "perl"      => undef,
+   "posix"     => ["hsearch1", "hsearchg", "tsearch"],
    "python"    => undef,
    "qptrie"    => ["qp-fanf", "qp-mdw", "qp-list",
                    "fn-fanf", "fn-mdw", "fn-list"],