Break low-level hashtable code out from sym.

author mdw <mdw>

Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)

committer mdw <mdw>

Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)
author mdw <mdw>
Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)
committer mdw <mdw>
Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)
diff --git a/hash.c b/hash.c

new file mode 100644 (file)

index 0000000..96c150b
--- /dev/null
+++ b/hash.c
@@ -0,0 +1,216 @@
+/* -*-c-*-
+ *
+ * $Id: hash.c,v 1.1 1999/08/02 14:45:48 mdw Exp $
+ *
+ * General hashtable infrastructure
+ *
+ * (c) 1999 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------* 
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ * 
+ * mLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Revision history --------------------------------------------------* 
+ *
+ * $Log: hash.c,v $
+ * Revision 1.1  1999/08/02 14:45:48  mdw
+ * Break low-level hashtable code out from sym.
+ *
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc.h"
+#include "bits.h"
+#include "exc.h"
+#include "hash.h"
+#include "track.h"
+
+/*----- Main code ---------------------------------------------------------*/
+
+/* --- @hash_create@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to initialize
+ *             @size_t n@ = number of bins to allocate (zero initially)
+ *
+ * Returns:    ---
+ *
+ * Use:                Initializes a hashtable with a given number of bins.  The
+ *             bins are initially empty.  The number of bins must be a power
+ *             of two.  This is not checked.
+ */
+
+void hash_create(hash_table *t, size_t n)
+{
+  hash_base **v;
+  TRACK_CTX("hashtable creation");
+  TRACK_PUSH;
+  t->v = xmalloc(n * sizeof(hash_base *));
+  t->mask = n - 1;
+  for (v = t->v; n; v++, n--)
+    *v = 0;
+  TRACK_POP;
+}
+
+/* --- @hash_destroy@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to destroy
+ *
+ * Returns:    ---
+ *
+ * Use:                Frees a hashtable.  The items are not freed: they are the
+ *             responsibility of the implementation.
+ */
+
+void hash_destroy(hash_table *t)
+{
+  TRACK_CTX("hashtable destruction");
+  TRACK_PUSH;
+  free(t->v);
+  TRACK_POP;
+}
+
+/* --- @hash_bin@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable
+ *             @uint32 hash@ = hash value to look up
+ *
+ * Returns:    Pointer to the bin's list head.
+ *
+ * Use:                Given a hash value return the address of the appropriate list
+ *             head.  It is safe to remove the current entry from the table.
+ */
+
+hash_base **hash_bin(hash_table *t, uint32 hash)
+{
+  return (HASH_BIN(t, hash));
+}
+
+/* --- @hash_extend@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to extend
+ *
+ * Returns:    Nonzero if extension was successful.
+ *
+ * Use:                Extends a hashtable.  The number of bins is doubled and the
+ *             entries are redistributed.
+ */
+
+int hash_extend(hash_table *t)
+{
+  hash_base **v;
+  uint32 m = t->mask + 1;
+  size_t i;
+
+  /* --- Push in a tracking context --- */
+
+  TRACK_CTX("hashtable extension");
+  TRACK_PUSH;
+
+  /* --- Allocate a new hash bin vector --- */
+
+  if ((v = realloc(t->v, m * 2 * sizeof(hash_base *))) == 0) {
+    TRACK_POP;
+    return (0);
+  }
+  t->v = v;
+  t->mask = (m * 2) - 1;
+
+  /* --- Wander through the table rehashing things --- */
+
+  for (i = 0; i < m; i++) {
+    hash_base **p = v + i;
+    hash_base **q = p + m;
+
+    while (*p) {
+      if (!((*p)->hash & m))
+       p = &(*p)->next;
+      else {
+       *q = *p;
+       q = &(*q)->next;
+       *p = (*p)->next;
+      }
+    }
+    *p = 0;
+    *q = 0;
+  }
+
+  TRACK_POP;
+  return (1);
+}
+
+/* --- @hash_remove@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable
+ *             @hash_base *p@ = pointer to item to remove
+ *
+ * Returns:    ---
+ *
+ * Use:                Removes an item from a hashtable.  The item itself is not
+ *             freed, although it is removed from the data structure and is
+ *             safe to free.
+ */
+
+void hash_remove(hash_table *t, hash_base *p)
+{
+  hash_base **b = HASH_BIN(t, p->hash);
+  while (*b) {
+    if (*b == p) {
+      *b = p->next;
+      return;
+    }
+    b = &(*b)->next;
+  }
+  return;
+}
+
+/* --- @hash_mkiter@ --- *
+ *
+ * Arguments:  @hash_iter *i@ = pointer to iterator to create
+ *             @hash_table *t@ = pointer to hashtable to iterate
+ *
+ * Returns:    ---
+ *
+ * Use:                Initializes a hashtable iterator.
+ */
+
+void hash_mkiter(hash_iter *i, hash_table *t) { HASH_MKITER(i, t); }
+
+/* --- @hash_next@ --- *
+ *
+ * Arguments:  @hash_iter *i@ = pointer to iterator
+ *
+ * Returns:    Pointer to the next hashtable entry found, or null.
+ *
+ * Use:                Returns the next entry from the hashtable.
+ */
+
+hash_base *hash_next(hash_iter *i)
+{
+  hash_base *b;
+  HASH_NEXT(i, b);
+  return (b);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/hash.h b/hash.h

new file mode 100644 (file)

index 0000000..6d739a7
--- /dev/null
+++ b/hash.h
@@ -0,0 +1,214 @@
+/* -*-c-*-
+ *
+ * $Id: hash.h,v 1.1 1999/08/02 14:45:48 mdw Exp $
+ *
+ * General hashtable infrastructure
+ *
+ * (c) 1999 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------* 
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ * 
+ * mLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Revision history --------------------------------------------------* 
+ *
+ * $Log: hash.h,v $
+ * Revision 1.1  1999/08/02 14:45:48  mdw
+ * Break low-level hashtable code out from sym.
+ *
+ */
+
+#ifndef HASH_H
+#define HASH_H
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/*----- Notes -------------------------------------------------------------*
+ *
+ * This isn't a complete implementation of anything.  It's a collection of
+ * useful types and functions which will help when building hashtables of
+ * things.  The general-purpose hashtable is provided by the @sym@ functions.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stddef.h>
+
+#ifndef BITS_H
+#  include "bits.h"
+#endif
+
+/*----- Data structures ---------------------------------------------------*/
+
+/* --- Hashtable basics --- *
+ *
+ * This contains the bare minimum to actually get anything useful done.  In
+ * particular it doesn't maintain any weighting information: when to extend
+ * the table is left up to the particular implementation.
+ */
+
+typedef struct hash_table {
+  uint32 mask;                         /* Bit mask of hash bits */
+  struct hash_base **v;                        /* Vector of hash bins */
+} hash_table;
+
+/* --- A hashtable entry --- *
+ *
+ * This is the bare minimum of what needs to be remembered in each hashtable
+ * entry.  Comparison of elements is left to the implementation, so I don't
+ * need to know anything about how to represent hash keys here.
+ */
+
+typedef struct hash_base {
+  struct hash_base *next;              /* Next entry in hash bin */
+  uint32 hash;                         /* Hash value for this entry */
+} hash_base;
+
+/* --- A hashtable iterator --- */
+
+typedef struct hash_iter {
+  hash_table *t;                       /* Hashtable being iterated */
+  hash_base *p;                                /* Address of next item to return */
+  size_t i;                            /* Index of next hash bin to use */
+} hash_iter;
+
+/*----- Functions provided ------------------------------------------------*/
+
+/* --- @hash_create@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to initialize
+ *             @size_t n@ = number of bins to allocate (zero initially)
+ *
+ * Returns:    ---
+ *
+ * Use:                Initializes a hashtable with a given number of bins.  The
+ *             bins are initially empty.  The number of bins must be a power
+ *             of two.  This is not checked.
+ */
+
+extern void hash_create(hash_table */*t*/, size_t /*n*/);
+
+/* --- @hash_destroy@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to destroy
+ *
+ * Returns:    ---
+ *
+ * Use:                Frees a hashtable.  The items are not freed: they are the
+ *             responsibility of the implementation.
+ */
+
+extern void hash_destroy(hash_table */*t*/);
+
+/* --- @hash_bin@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable
+ *             @uint32 hash@ = hash value to look up
+ *
+ * Returns:    Pointer to the bin's list head.
+ *
+ * Use:                Given a hash value return the address of the appropriate list
+ *             head.  It is safe to remove the current entry from the table.
+ */
+
+#define HASH_BIN(t, hash) ((t)->v + ((hash) & (t)->mask))
+
+extern hash_base **hash_bin(hash_table */*t*/, uint32 /*hash*/);
+
+/* --- @hash_extend@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable to extend
+ *
+ * Returns:    Nonzero if extension was successful.
+ *
+ * Use:                Extends a hashtable.  The number of bins is doubled and the
+ *             entries are redistributed.
+ */
+
+extern int hash_extend(hash_table */*t*/);
+
+/* --- @hash_remove@ --- *
+ *
+ * Arguments:  @hash_table *t@ = pointer to hashtable
+ *             @hash_base *p@ = pointer to item to remove
+ *
+ * Returns:    ---
+ *
+ * Use:                Removes an item from a hashtable.  The item itself is not
+ *             freed, although it is removed from the data structure and is
+ *             safe to free.
+ */
+
+extern void hash_remove(hash_table */*t*/, hash_base */*p*/);
+
+/* --- @hash_mkiter@ --- *
+ *
+ * Arguments:  @hash_iter *i@ = pointer to iterator to create
+ *             @hash_table *t@ = pointer to hashtable to iterate
+ *
+ * Returns:    ---
+ *
+ * Use:                Initializes a hashtable iterator.
+ */
+
+#define HASH_MKITER(i_, t_) ((i_)->t = (t_), (i_)->p = 0, (i_)->i = 0)
+
+extern void hash_mkiter(hash_iter */*i*/, hash_table */*t*/);
+
+/* --- @hash_next@ --- *
+ *
+ * Arguments:  @hash_iter *i@ = pointer to iterator
+ *
+ * Returns:    Pointer to the next hashtable entry found, or null.
+ *
+ * Use:                Returns the next entry from the hashtable.
+ */
+
+#define HASH_NEXT(i_, b_) do {                                         \
+  hash_iter *_i = (i_);                                                        \
+  hash_base *_p;                                                       \
+  hash_table *_t = _i->t;                                              \
+  uint32 _m = _t->mask;                                                        \
+                                                                       \
+  for (;;) {                                                           \
+    if (_i->p) {                                                       \
+      _p = _i->p;                                                      \
+      _i->p = _p->next;                                                        \
+      break;                                                           \
+    } else if (_i->i > _m) {                                           \
+      _p = 0;                                                          \
+      break;                                                           \
+    } else                                                             \
+      _i->p = _t->v[_i->i++];                                          \
+  }                                                                    \
+  (b_) = _p;                                                           \
+} while (0)
+
+extern hash_base *hash_next(hash_iter */*i*/);
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif
diff --git a/man/hash.3 b/man/hash.3

new file mode 100644 (file)

index 0000000..9df4670
--- /dev/null
+++ b/man/hash.3
@@ -0,0 +1,294 @@
+.\" -*-nroff-*-
+.de VS
+.sp 1
+.RS
+.nf
+.ft B
+..
+.de VE
+.ft R
+.fi
+.RE
+.sp 1
+..
+.de hP
+.IP
+.ft B
+\h'-\w'\\$1\ 'u'\\$1\ \c
+.ft P
+..
+.ie t .ds o \(bu
+.el .ds o o
+.TH hash 3 "2 August 1999" mLib
+.SH "NAME"
+hash \- low-level hashtable implementation
+.\" @hash_create
+.\" @hash_destroy
+.\" @hash_bin
+.\" @hash_extend
+.\" @hash_remove
+.\" @hash_mkiter
+.\" @hash_next
+.\"
+.\" @HASH_BIN
+.\" @HASH_MKITER
+.\" @HASH_NEXT
+.\"
+.SH "SYNOPSIS"
+.nf
+.B "#include <mLib/hash.h>"
+
+.BI "void hash_create(hash_table *" t ", size_t " n );
+.BI "void hash_destroy(hash_table *" t );
+.BI "hash_base **hash_bin(hash_table *" t ", uint32 " hash );
+.BI "int hash_extend(hash_table *" t );
+.BI "void hash_remove(hash_table *" t ", hash_base * " b );
+.BI "void hash_mkiter(hash_iter *" i ", hash_table *" t );
+.BI "hash_base *hash_next(hash_iter *" i );
+
+.BI "hash_base **HASH_BIN(hash_table *" t ", uint32 " hash );
+.BI "void HASH_MKITER(hash_iter *" i ", hash_table *" t );
+.BI "void HASH_NEXT(hash_iter *" i ", " b );
+.fi
+.SH "OVERVIEW"
+The
+.B hash
+functions provide the basis for an extensible hashtable implementation.
+The implementation is not complete.  Many decisions have been left to
+the user, including:
+.hP \*o
+How keys should be represented, hashed and compared.
+.hP \*o
+How objects contained within the table should be allocated.
+.hP \*o
+When the hashtable should be extended.
+.PP
+A complete hashtable implementation will need to take the above
+decisions.  If you just want a prepackaged solution, see
+.BR sym (3)
+which provides one.
+.SH "IMPLEMENTATION DETAILS"
+Each item in the hashtable is assigned a 32-bit integer
+.IR hash :
+a number computed somehow from the item's data such that two items which
+are considered equal will yield the same hash.  Ideally, different items
+will yield different hashes.  It is important for this implementation
+that all bits of the hash are similarly good.
+.PP
+In order to look up an item, the high bits of the hash are masked off
+and the remainder used as an index into a vector of
+.IR "bin lists" .
+Each bin contains a list of items with identical low-order bits of their
+hashes.
+.PP
+A table expansion involves doubling the size of the bin vector.  Each
+bin list is then split into two, items being placed into a new bin
+depending on the setting of the next lowest hash bit.  By expanding the
+hashtable as needed, lookups remain constant-time.
+.PP
+A low-level hashtable is represented by a
+.B hash_table
+structure.  It contains two members:
+.TP
+.B mask
+The current bitmask to be applied to hashes.  This is one less than the
+current number of bins in the hashtable, and is applied to hash values
+in order to decide which bin an item should be in.
+.TP
+.B v
+The bin vector.  It is an array of pointers to hashtable items.
+.PP
+A hashtable item consists of a
+.B hash_base
+structure.  A full hashtable implementation will need to extend this
+structure by adding keying information and other data; the
+.B hash_base
+only contains the bare minimum of information needed to maintain the
+hashtable at a low level.  It contains the following members:
+.TP
+.B next
+Pointer to the next item in the bin list.  The final item has a null
+.B next
+pointer.  The entry in the bin vector is null if the bin list is empty.
+It is up to the high-level implementation to insert items into the list.
+.TP
+.B hash
+The hash for this item.  This must be the full 32-bit hash for the
+current item.  It is used during hashtable expansion to determine which
+bin an item should be moved to.
+.SH "FUNCTIONALITY PROVIDED"
+This section describes the functions and macros provided for building
+hashtables.  Code examples are given throughout.  They assume the
+following definitions:
+.VS
+/* --- A table of items --- */
+
+typedef struct item_table {
+  hash_table t;
+  size_t load;
+};
+
+/* --- An item --- */
+
+typedef struct item {
+  hash_base b;
+  const char *k;
+  /* ... */
+} item;
+.VE
+The implementation presented here is simple but relatively bad.  The
+source file
+.B sym.c
+presents a more realistic example, but is rather more complex.
+.SS "Initialization and finalization"
+An empty hashtable is initialized by calling
+.B hash_create
+with the address of a
+.B hash_table
+structure to be filled in and the initial number of hash bins to create.
+.PP
+For example, an item table might be initialized like this:
+.VS
+void item_createtab(item_table *t)
+{
+  hash_create(&t->t, ITEM_INITSZ);
+  t->load = ITEM_INITLOAD;
+}
+.VE
+A hashtable can be destroyed by calling
+.B hash_destroy
+with the address of the
+.B hash_table
+structure.  This does not attempt to deallocate the individual items;
+that must be done beforehand.
+.PP
+The usual way to deallocate the individual hashtable items is using the
+iteration constructs described below.
+.VS
+void item_destroytab(item_table *t)
+{
+  hash_iter i;
+  hash_base *b;
+  for (hash_mkiter(&i, &t->t); (b = hash_next(&i)) != 0; ) {
+    item *ii = (item *)b;
+    free(ii->k);
+    /* ... */
+    DESTROY(ii);
+  }
+  hash_destroy(&t->t);
+}
+.VE
+.sp -1
+.SS "Searching, adding and removing"
+Items must be searched for and added by hand.
+.PP
+The macro
+.B HASH_BIN
+returns the address of the bin list haed for a particular hashtable and
+hash value.  The function
+.B hash_bin
+works the same way and provides the same result, but since the macro is
+very simple its use is preferred.  However, it will evaluate its
+arguments multiple times.
+.PP
+Once the bin list has been found, it's fairly easy to search for an
+exact match.  A simple search might look something like this:
+.VS
+item *lookup(item_table *t, const char *k)
+{
+  uint32 h = hash(k);          /* Hash @k@ somehow */
+  hash_base **bin = HASH_BIN(&t->t, h);
+  hash_base *b;
+  for (b = *bin; b; b = b->next) {
+    item *i = (item *)b;
+    if (h == i->b.hash && strcmp(k, i->k) == 0)
+      return (i);
+  }
+  return (0);
+}
+.VE
+Insertion is also relatively trivial given the bin list head.  Insertion
+may make the hashtable too large, so it might be necessary to extend
+it.  Extension is performed by
+.BR hash_extend ,
+which is passed only the address of the hashtable.  It returns nonzero
+if extension was successful.
+.VS
+item *add(item_table *t, const char *k, /* ... */)
+{
+  item *i;
+  uint32 h;
+  hash_base **bin;
+
+  /* --- See if the item is already there --- */
+
+  if ((i =  = lookup(t, k)) != 0)
+    return (i);
+
+  /* --- Make a new hashtable item --- */
+
+  i = CREATE(item);
+  i->k = xstrdup(k);
+  /* ... */
+
+  /* --- Link it into the bin list --- */
+
+  h = i->b.hash = hash(k);
+  bin = HASH_BIN(&t->t, h);
+  i->b.next = *bin;
+  *bin = &i->b.next;
+
+  /* --- Maybe extend the hashtable --- */
+
+  if (t->load)
+    t->load--;
+  else if (hash_extend(&t->t))
+    t->load = recalc_load(t);
+
+  /* --- Done --- */
+
+  return (i);
+}
+.VE
+The
+.B sym
+implementation is rather more sophisticated in its approach but the idea
+is the same.  In particular,
+.B sym
+provides a single interface for lookup and insertion which prevents the
+unnecessary rehashing performed by the above code.
+.PP
+Removal of items is more straightforward.  The function
+.B hash_remove
+will unlink a given item from its bin list, after which point it is safe
+to remove.
+.SS "Iteration"
+Iteration allows code to be performed on all the items in a hashtable.
+This is done using an
+.I iterator
+object, represented by a
+.B hash_iter
+structure.  An iterator is initialized by calling
+.BR hash_mkiter .
+Each call to
+.B hash_next
+yields a different item from the hashtable until there are none left, a
+condition signified by a null return value.
+.PP
+The macros
+.B HASH_MKITER
+and
+.B HASH_NEXT
+do the same jobs as the above functions.  However,
+.B HASH_NEXT
+has a slightly bizarre argument passing convention: its second argument
+is an
+.I lvalue
+which is updated to contain the address of the next item.
+.PP
+The finalization code above contained an example of iteration.
+.SH "SEE ALSO"
+.BR sym (3),
+.BR mLib (3).
+.SH "AUTHOR"
+Mark Wooding, <mdw@nsict.org>
diff --git a/sym.c b/sym.c

index 394b761e8fd6e4005aabc38e977cb2a75db2aa92..03d2e82b19a4aff9114e23693f43226dbd7e076d 100644 (file)
--- a/sym.c
+++ b/sym.c
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
- * $Id: sym.c,v 1.7 1999/06/01 09:49:08 mdw Exp $
+ * $Id: sym.c,v 1.8 1999/08/02 14:45:48 mdw Exp $
   *
   * Symbol table management
   *
@@ -30,6 +30,9 @@
  /*----- Revision history --------------------------------------------------*
   *
   * $Log: sym.c,v $
+ * Revision 1.8  1999/08/02 14:45:48  mdw
+ * Break low-level hashtable code out from sym.
+ *
   * Revision 1.7  1999/06/01 09:49:08  mdw
   * Allow things to be looked up by just their caller-supplied hashes.  This
   * actually needs to be thought through better.
@@ -68,6 +71,7 @@
  #include "bits.h"
  #include "crc32.h"
  #include "exc.h"
+#include "hash.h"
  #include "sub.h"
  #include "sym.h"
  #include "track.h"
@@ -80,7 +84,7 @@
   * so that it can be used to mask of the bottom bits of a hash value.
   */
  
-#define SYM_INITSZ 255                 /* Size of a new hash table */
+#define SYM_INITSZ 64                  /* Size of a new hash table */
  
  /* --- Maximum load factor --- *
   *
@@ -91,7 +95,7 @@
   * doubled in size.
   */
  
-#define SYM_LIMIT(n) ((n) * 4)         /* Load factor for growing table */
+#define SYM_LIMIT(n) ((n) * 2)         /* Load factor for growing table */
  
  /*----- Main code ---------------------------------------------------------*/
  
@@ -107,18 +111,10 @@
  
  void sym_create(sym_table *t)
  {
-  size_t i;
-
    TRACK_CTX("symbol table creation");
    TRACK_PUSH;
-
-  t->mask = SYM_INITSZ;
-  t->c = SYM_LIMIT(SYM_INITSZ);
-  t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
-
-  for (i = 0; i < SYM_INITSZ + 1; i++)
-    t->a[i] = 0;
-
+  hash_create(&t->t, SYM_INITSZ);
+  t->load = SYM_LIMIT(SYM_INITSZ);
    TRACK_POP;
  }
  
@@ -134,23 +130,22 @@ void sym_create(sym_table *t)
  
  void sym_destroy(sym_table *t)
  {
-  size_t i;
-  sym_base *p, *q;
+  sym_iter i;
  
-  TRACK_CTX("symbol table deletion");
+  TRACK_CTX("symbol table destruction");
    TRACK_PUSH;
  
-  for (i = 0; i <= t->mask; i++) {
-    p = t->a[i];
-    while (p) {
-      q = p->next;
-      if (p->len > SYM_BUFSZ)
-       sub_free(p->name.p, p->len);
-      free(p);
-      p = q;
-    }
+  SYM_MKITER(&i, t);
+  for (;;) {
+    sym_base *p;
+    SYM_NEXT(&i, p);
+    if (!p)
+      break;
+    if (p->len > SYM_BUFSZ)
+      sub_free(p->name.p, p->len);
+    free(p);
    }
-  free(t->a);
+  hash_destroy(&t->t);
  
    TRACK_POP;
  }
@@ -171,9 +166,7 @@ void sym_destroy(sym_table *t)
   *             may be given, in which case the name may contain arbitrary
   *             binary data, or it may be given as a negative number, in
   *             which case the length of the name is calculated as
- *             @strlen(n) + 1@.  The name pointer @n@ may also be zero; in
- *             this case, @l@ is taken to be a raw hash, and any element
- *             with a matching hash is taken to be the one wanted.
+ *             @strlen(n) + 1@.
   *
   *             The return value is the address of a pointer to a @sym_base@
   *             block (which may have other things on the end, as above).  If
@@ -192,43 +185,36 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
  {
    uint32 hash;
    size_t len = 0;
-  sym_base *bin;
-  sym_base *p, *q;
+  hash_base **bin, **p;
+  sym_base *q;
  
    /* --- Find the correct bin --- */
  
-  if (n) {
-    len = l < 0 ? strlen(n) + 1 : l;
-    CRC32(hash, 0, n, len);
-  } else
-    hash = (uint32)l;
-  bin = p = (sym_base *)(t->a + (hash & t->mask));
+  len = l < 0 ? strlen(n) + 1 : l;
+  CRC32(hash, 0, n, len);
+  bin = HASH_BIN(&t->t, hash);
  
    /* --- Search the bin list --- */
  
-  while (p->next) {
-    if (hash == p->next->hash &&
-       (n == 0 || (len == p->next->len &&
-                   !memcmp(n, SYM_NAME(p->next), len))))
-    {
+  for (p = bin; *p; p = &(*p)->next) {
+    q = (sym_base *)*p;
+    if (hash == q->b.hash && len == q->len && !memcmp(n, SYM_NAME(q), len)) {
+
        /* --- Found a match --- *
         *
         * As a minor, and probably pointless, tweak, move the item to the
         * front of its bin list.
         */
  
-      q = p->next;
-      p->next = q->next;
-      q->next = bin->next;
-      bin->next = q;
+      (*p) = q->b.next;
+      q->b.next = *bin;
+      *bin = &q->b;
  
        /* --- Return the block --- */
  
        if (f) *f = 1;
        return (q);
      }
-
-    p = p->next;
    }
  
    /* --- Couldn't find the item there --- */
@@ -242,19 +228,19 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
      TRACK_CTX("new symbol creation");
      TRACK_PUSH;
  
-    p = xmalloc(sz);
-    p->next = bin->next;
-    p->hash = hash;
-    p->len = len;
+    q = xmalloc(sz);
+    q->b.next = *bin;
+    q->b.hash = hash;
+    q->len = len;
      if (n) {
        if (len <= SYM_BUFSZ)
-       memcpy(p->name.b, n, len);
+       memcpy(q->name.b, n, len);
        else {
         TRY {
-         p->name.p = sub_alloc(len);
-         memcpy(p->name.p, n, len);
+         q->name.p = sub_alloc(len);
+         memcpy(q->name.p, n, len);
         } CATCH {
-         free(p);
+         free(q);
           TRACK_POP;
           RETHROW;
         } END_TRY;
@@ -264,76 +250,24 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
      TRACK_POP;
    }
  
-  bin->next = p;
+  *bin = &q->b;
  
    /* --- Consider growing the array --- */
  
-  if (t->c)
-    t->c--;
-  if (!t->c) {
-    uint32 m = t->mask + 1;
-    sym_base *p, *q, *r;
-    size_t i, lim;
-
-    TRACK_CTX("symbol table extension");
-    TRACK_PUSH;
-
-    /* --- Update values in the anchor block --- */
-
-    TRY {
-      t->a = xrealloc(t->a, (t->mask + 1) * 2 * sizeof(sym_base *));
-    } CATCH switch (exc_type) {
-      case EXC_NOMEM:
-       TRACK_POP;
-       return (bin->next);
-      default:
-       TRACK_POP;
-       RETHROW;
-    } END_TRY;
-
-    t->c = SYM_LIMIT(t->mask + 1);
-    t->mask = (t->mask + 1) * 2 - 1;
-
-    /* --- Now wander through the table rehashing things --- *
-     *
-     * This loop is very careful to avoid problems with aliasing.  The items
-     * are dealt with from the end backwards to avoid overwriting bins before
-     * they've been processed.
-     */
-
-    lim = (t->mask + 1) >> 1;
-    for (i = 0; i < lim; i++) {
-
-      /* --- Some initialization --- */
-
-      r = t->a[i];
-      p = (sym_base *)(t->a + i);
-      q = (sym_base *)(t->a + i + lim);
-
-      /* --- Now go through the @r@ list --- */
-
-      while (r) {
-       if (r->hash & m)
-         q = q->next = r;
-       else
-         p = p->next = r;
-       r = r->next;
-      }
-      p->next = q->next = 0;
-    }
-
-    TRACK_POP;
-  }
+  if (t->load)
+    t->load--;
+  if (!t->load && hash_extend(&t->t))
+    t->load = SYM_LIMIT(t->t.mask / 2 + 1);
  
    /* --- Finished that, so return the new symbol block --- */
  
-  return (p);
+  return (q);
  }
  
  /* --- @sym_remove@ --- *
   *
   * Arguments:  @sym_table *i@ = pointer to a symbol table object
- *             @void *b@ = pointer to symbol table entry
+ *             @void *p@ = pointer to symbol table entry
   *
   * Returns:    ---
   *
@@ -342,38 +276,14 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
   *             to the entry should already be gone by this point.
   */
  
-void sym_remove(sym_table *t, void *b)
+void sym_remove(sym_table *t, void *p)
  {
-  /* --- A quick comment --- *
-   *
-   * Since the @sym_base@ block contains the hash, finding the element in the
-   * bin list is really quick -- it's not worth bothering with things like
-   * doubly linked lists.
-   */
-
-  sym_base *p = b;
-  sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
-
-  /* --- Find the item in the bin list --- */
-
-  while (bin->next) {
-    if (bin->next == p)
-      break;
-    bin = bin->next;
-  }
-  if (!bin->next)
-    return;
-
-  /* --- Now just remove the item from the list and free it --- *
-   *
-   * Oh, and bump the load counter.
-   */
-
-  bin->next = p->next;
-  if (p->len > SYM_BUFSZ)
-    sub_free(p->name.p, p->len);
-  free(p);
-  t->c++;
+  sym_base *q = p;
+  hash_remove(&t->t, &q->b);
+  if (q->len > SYM_BUFSZ)
+    sub_free(q->name.p, q->len);
+  free(q);
+  t->load++;
  }
  
  /* --- @sym_mkiter@ --- *
@@ -387,12 +297,7 @@ void sym_remove(sym_table *t, void *b)
   *             iterate through a symbol table.
   */
  
-void sym_mkiter(sym_iter *i, sym_table *t)
-{
-  i->t = t;
-  i->i = 0;
-  i->n = 0;
-}
+void sym_mkiter(sym_iter *i, sym_table *t) { SYM_MKITER(i, t); }
  
  /* --- @sym_next@ --- *
   *
@@ -406,23 +311,8 @@ void sym_mkiter(sym_iter *i, sym_table *t)
  
  void *sym_next(sym_iter *i)
  {
-  sym_base *p;
-
-  /* --- Find the next item --- */
-
-  while (!i->n) {
-    if (i->i > i->t->mask)
-      return (0);
-    i->n = i->t->a[i->i++];
-  }
-
-  /* --- Update the iterator block --- */
-
-  p = i->n;
-  i->n = p->next;
-
-  /* --- Done --- */
-
+  void *p;
+  SYM_NEXT(i, p);
    return (p);
  }
  
@@ -522,10 +412,7 @@ int main(void)
        case 0: {
         sym_word *w;
  
-       printf("find `%s'\n", line[i]);
-       if ((rand() & 1023) == 0) {
-         putchar('.'); fflush(stdout);
-       }
+       printf("? %s\n", line[i]);
  
         w = sym_find(&tbl, line[i], -1, 0, 0);
         if (w != flag[i])
@@ -540,10 +427,7 @@ int main(void)
         unsigned f;
         sym_word *w;
  
-       printf("create `%s'\n", line[i]);
-       if ((rand() & 1023) == 0) {
-         putchar('+'); fflush(stdout);
-       }
+       printf("+ %s\n", line[i]);
  
         w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
         if (f)
@@ -578,10 +462,8 @@ int main(void)
         v = (rand() % entries) == 0;
         if (!v)
           break;
-       printf("\niterated %i entries\n", entries);
-       break;
  
-       printf("iterate\n");
+       printf(".\n");
  
         ntbl = xmalloc(sz * sizeof(sym_word *));
         memcpy(ntbl, flag, sz * sizeof(sym_word *));
@@ -589,14 +471,16 @@ int main(void)
  
         while ((w = sym_next(&it)) != 0) {
           if (ntbl[w->i] == 0)
-           printf("*** error: iterate returned duff item %i\n", w->i);
-         else
+           printf("*** error: iterate returned duff item %s\n", SYM_NAME(w));
+         else {
+           printf(": %s\n", SYM_NAME(w));
             ntbl[w->i] = 0;
+         }
         }
  
         for (i = 0; i < sz; i++)
-         if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
-                             i);
+         if (ntbl[i]) printf("*** error: iterate didn't return item %s\n",
+                             SYM_NAME(ntbl[i]));
         free(ntbl);
        } break;
  
@@ -607,18 +491,18 @@ int main(void)
  
         printf("dump\n");
  
-       for (i = 0; i <= tbl.mask; i++) {
-         if (!tbl.a[i]) continue;
+       for (i = 0; i <= tbl.b.mask; i++) {
+         if (!tbl.b.v[i]) continue;
           if (v) printf("  %i: ", i);
-         b = tbl.a[i];
+         b = (sym_base *)tbl.b.v[i];
           while (b) {
-           if ((b->hash & tbl.mask) != i)
+           if ((b->b.hash & tbl.b.mask) != i)
               printf("*** error: bad hash value found");
             if (v) printf("`%s'(%08lx:%lu) ",
                           line[((sym_word *)b)->i],
-                         b->hash,
-                         b->hash & tbl.mask);
-           b = b->next;
+                         b->b.hash,
+                         b->b.hash & tbl.b.mask);
+           b = (sym_base *)b->b.next;
           }
           if (v) putchar('\n');
         }
@@ -626,7 +510,7 @@ int main(void)
  
        case 4: {
         if (flag[i]) {
-         printf("remove `%s'\n", SYM_NAME(&flag[i]->base));
+         printf("- %s\n", SYM_NAME(&flag[i]->base));
           if ((rand() & 1023) == 0) {
             putchar('-'); fflush(stdout);
           }
diff --git a/sym.h b/sym.h

index 94d43c51847047348f0d9e6e1765bae0cf122ee9..edd3284b174f2ffed15f022427a03eba11fad920 100644 (file)
--- a/sym.h
+++ b/sym.h
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
- * $Id: sym.h,v 1.7 1999/06/01 09:49:33 mdw Exp $
+ * $Id: sym.h,v 1.8 1999/08/02 14:45:48 mdw Exp $
   *
   * Symbol table management
   *
@@ -30,6 +30,9 @@
  /*----- Revision history --------------------------------------------------*
   *
   * $Log: sym.h,v $
+ * Revision 1.8  1999/08/02 14:45:48  mdw
+ * Break low-level hashtable code out from sym.
+ *
   * Revision 1.7  1999/06/01 09:49:33  mdw
   * Allow things to be looked up by just their caller-supplied hashes.  This
   * actually needs to be thought through better.
@@ -69,6 +72,10 @@
  #  include "bits.h"
  #endif
  
+#ifndef HASH_H
+#  include "hash.h"
+#endif
+
  /*----- Type definitions --------------------------------------------------*/
  
  /* --- Symbol table --- *
@@ -79,9 +86,8 @@
   */
  
  typedef struct sym_table {
-  unsigned long mask;                  /* Bit mask for hashing purposes */
-  size_t c;                            /* Down counter for growing table */
-  struct sym_base **a;                 /* Array of hash bins */
+  hash_table t;
+  size_t load;
  } sym_table;
  
  /* --- A symbol table entry --- *
@@ -96,8 +102,7 @@ typedef struct sym_table {
  #define SYM_BUFSZ 16                   /* Size of local string buffer */
  
  typedef struct sym_base {
-  struct sym_base *next;               /* Next symbol in hash bin */
-  uint32 hash;                         /* Hash value for symbol's name */
+  hash_base b;
    union {
      char *p;                           /* Pointer to name string */
      char b[SYM_BUFSZ];                 /* Buffer containing a short name */
@@ -114,11 +119,7 @@ typedef struct sym_base {
  
  /* --- An iterator block --- */
  
-typedef struct sym_iter {
-  sym_table *t;                                /* Symbol table being iterated */
-  sym_base *n;                         /* Address of next item to return */
-  size_t i;                            /* Index of next hash bin to use */
-} sym_iter;
+typedef hash_iter sym_iter;
  
  /*----- External functions ------------------------------------------------*/
  
@@ -162,9 +163,7 @@ extern void sym_destroy(sym_table */*t*/);
   *             may be given, in which case the name may contain arbitrary
   *             binary data, or it may be given as a negative number, in
   *             which case the length of the name is calculated as
- *             @strlen(n) + 1@.  The name pointer @n@ may also be zero; in
- *             this case, @l@ is taken to be a raw hash, and any element
- *             with a matching hash is taken to be the one wanted.
+ *             @strlen(n) + 1@.
   *
   *             The return value is the address of a pointer to a @sym_base@
   *             block (which may have other things on the end, as above).  If
@@ -207,6 +206,8 @@ extern void sym_remove(sym_table */*t*/, void */*b*/);
   *             iterate through a symbol table.
   */
  
+#define SYM_MKITER(i, t) HASH_MKITER((i), &(t)->t)
+
  extern void sym_mkiter(sym_iter */*i*/, sym_table */*t*/);
  
  /* --- @sym_next@ --- *
@@ -219,6 +220,12 @@ extern void sym_mkiter(sym_iter */*i*/, sym_table */*t*/);
   *             returned in any particular order.
   */
  
+#define SYM_NEXT(i, p) do {                                            \
+  hash_base *_q;                                                       \
+  HASH_NEXT((i), _q);                                                  \
+  (p) = (void *)_q;                                                    \
+} while (0)
+
  extern void *sym_next(sym_iter */*i*/);
  
  /*----- That's all, folks -------------------------------------------------*/
author	mdw <mdw>
	Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)
committer	mdw <mdw>
	Mon, 2 Aug 1999 14:45:48 +0000 (14:45 +0000)
hash.c	[new file with mode: 0644]	patch \| blob
hash.h	[new file with mode: 0644]	patch \| blob
man/hash.3	[new file with mode: 0644]	patch \| blob
sym.c		patch \| blob \| blame \| history
sym.h		patch \| blob \| blame \| history