/* -*-c-*-
*
- * $Id: sym.c,v 1.5 1999/05/13 22:48:37 mdw Exp $
+ * $Id: sym.c,v 1.13 2001/01/25 21:14:49 mdw Exp $
*
* Symbol table management
*
/*----- Revision history --------------------------------------------------*
*
* $Log: sym.c,v $
+ * Revision 1.13 2001/01/25 21:14:49 mdw
+ * Always add a terminating null, and don't count it in the length.
+ *
+ * Revision 1.12 2001/01/20 11:49:37 mdw
+ * Export tuning parameters from header file, for the benefit of other
+ * hashtable implementations. Change the storage of symbol names: store
+ * the name after the allocated symbol block in all cases. This replaces
+ * the previous complicated and slightly wasteful arrangement.
+ *
+ * Revision 1.11 2000/06/17 10:37:39 mdw
+ * Add support for arena management.
+ *
+ * Revision 1.10 1999/12/10 23:42:04 mdw
+ * Change header file guard names.
+ *
+ * Revision 1.9 1999/10/22 22:36:37 mdw
+ * New test structure for symbol tables.
+ *
+ * Revision 1.8 1999/08/02 14:45:48 mdw
+ * Break low-level hashtable code out from sym.
+ *
+ * Revision 1.7 1999/06/01 09:49:08 mdw
+ * Allow things to be looked up by just their caller-supplied hashes. This
+ * actually needs to be thought through better.
+ *
+ * Revision 1.6 1999/05/26 21:08:31 mdw
+ * Rename symbols in line with newer conventions.
+ *
* Revision 1.5 1999/05/13 22:48:37 mdw
* Twiddle the extension threshold. Change `-ise' to `-ize' throughout.
*
/* --- Local headers --- */
#include "alloc.h"
+#include "arena.h"
+#include "bits.h"
#include "crc32.h"
#include "exc.h"
+#include "hash.h"
#include "sub.h"
#include "sym.h"
-#include "track.h"
-
-/*----- Tuning parameters -------------------------------------------------*/
-
-/* --- Initial hash table size --- *
- *
- * This is the initial @mask@ value. It must be of the form %$2^n - 1$%,
- * so that it can be used to mask of the bottom bits of a hash value.
- */
-
-#define SYM_INITSZ 255 /* Size of a new hash table */
-
-/* --- Maximum load factor --- *
- *
- * This parameter controls how much the table has to be loaded before the
- * table is extended. The number of elements %$n$%, the number of bins %$b$%
- * and the limit %$l$% satisfy the relation %$n < bl$%; if a new item is
- * added to the table and this relation is found to be false, the table is
- * doubled in size.
- */
-
-#define SYM_LIMIT(n) ((n) * 4) /* Load factor for growing table */
/*----- Main code ---------------------------------------------------------*/
-/* --- @sym_createTable@ --- *
+/* --- @sym_create@ --- *
*
* Arguments: @sym_table *t@ = symbol table to initialize
*
* there isn't enough memory.
*/
-void sym_createTable(sym_table *t)
+void sym_create(sym_table *t)
{
- size_t i;
-
- TRACK_CTX("symbol table creation");
- TRACK_PUSH;
-
- t->mask = SYM_INITSZ;
- t->c = SYM_LIMIT(SYM_INITSZ);
- t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
-
- for (i = 0; i < SYM_INITSZ + 1; i++)
- t->a[i] = 0;
-
- TRACK_POP;
+ hash_create(&t->t, SYM_INITSZ);
+ t->s = &sub_global;
+ t->load = SYM_LIMIT(SYM_INITSZ);
}
-/* --- @sym_destroyTable@ --- *
+/* --- @sym_destroy@ --- *
*
* Arguments: @sym_table *t@ = pointer to symbol table in question
*
* occupy.
*/
-void sym_destroyTable(sym_table *t)
+void sym_destroy(sym_table *t)
{
- size_t i;
- sym_base *p, *q;
-
- TRACK_CTX("symbol table deletion");
- TRACK_PUSH;
-
- for (i = 0; i <= t->mask; i++) {
- p = t->a[i];
- while (p) {
- q = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- p = q;
- }
- }
- free(t->a);
+ sym_iter i;
- TRACK_POP;
+ SYM_MKITER(&i, t);
+ for (;;) {
+ sym_base *p;
+ SYM_NEXT(&i, p);
+ if (!p)
+ break;
+ x_free(t->t.a, p);
+ }
+ hash_destroy(&t->t);
}
/* --- @sym_find@ --- *
void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
{
- unsigned long hash; /* Hash value for user's name */
- size_t len = l < 0 ? strlen(n) + 1 : l; /* Find length of user's name */
- sym_base *bin; /* Bin containing our item */
- sym_base *p, *q; /* Pointer wandering through list */
+ uint32 hash;
+ size_t len = 0;
+ hash_base **bin, **p;
+ sym_base *q;
/* --- Find the correct bin --- */
- CRC32(hash, 0, n, len); /* Find hash value for this name */
- bin = p = (sym_base *)(t->a + (hash & t->mask));
+ len = l < 0 ? strlen(n) : l;
+ CRC32(hash, 0, n, len);
+ bin = HASH_BIN(&t->t, hash);
/* --- Search the bin list --- */
- while (p->next) {
- if (hash == p->next->hash && /* Check full hash values first */
- len == p->next->len && /* Then compare string lengths */
- !memcmp(n, SYM_NAME(p->next), len)) /* And finally compare names */
- {
+ for (p = bin; *p; p = &(*p)->next) {
+ q = (sym_base *)*p;
+ if (hash == q->b.hash && len == q->len && !memcmp(n, SYM_NAME(q), len)) {
+
/* --- Found a match --- *
*
* As a minor, and probably pointless, tweak, move the item to the
* front of its bin list.
*/
- q = p->next; /* Find the actual symbol block */
- p->next = q->next; /* Extract block from bin list */
- q->next = bin->next; /* Set up symbol's next pointer */
- bin->next = q; /* And reinsert the block */
+ (*p) = q->b.next;
+ q->b.next = *bin;
+ *bin = &q->b;
/* --- Return the block --- */
- if (f) *f = 1; /* Didn't fail to find the item */
- return (q); /* And return the block */
+ if (f) *f = 1;
+ return (q);
}
-
- p = p->next; /* Move onto the next item */
}
/* --- Couldn't find the item there --- */
- if (f) *f = 0; /* Failed to find the block */
- if (!sz) return (0); /* Return zero if not creating */
-
- /* --- Create a new symbol block and initialize it --- */
-
- {
- TRACK_CTX("new symbol creation");
- TRACK_PUSH;
-
- p = xmalloc(sz); /* Create a new symbol block */
- p->next = bin->next; /* Set up the next pointer */
- p->hash = hash; /* Set up the hash value too */
- p->len = len; /* And set up the string length */
- if (len <= SYM_BUFSZ)
- memcpy(p->name.b, n, len); /* And copy the string over */
- else {
- TRY {
- p->name.p = sub_alloc(len); /* Allocate a block for the name */
- memcpy(p->name.p, n, len); /* And copy the string over */
- } CATCH {
- free(p);
- TRACK_POP;
- RETHROW;
- } END_TRY;
- }
+ if (f) *f = 0;
+ if (!sz) return (0);
- TRACK_POP;
- }
+ /* --- Create a new symbol block and initialize it --- *
+ *
+ * The name is attached to the end of the symbol block.
+ */
- bin->next = p; /* Put the pointer into the bin */
+ q = x_alloc(t->t.a, sz + len + 1);
+ q->b.next = *bin;
+ q->b.hash = hash;
+ q->name = (char *)q + sz;
+ memcpy(q->name, n, len);
+ q->name[len] = 0;
+ q->len = len;
+ *bin = &q->b;
/* --- Consider growing the array --- */
- if (t->c)
- t->c--;
- if (!t->c) {
- unsigned long m = t->mask + 1; /* Next set bit in has word */
- sym_base *p, *q, *r; /* More useful pointers */
- size_t i, lim; /* Loop counter and limit */
-
- TRACK_CTX("symbol table extension");
- TRACK_PUSH;
-
- /* --- Update values in the anchor block --- */
-
- TRY {
- t->a = xrealloc(t->a, (t->mask + 1) * 2 * sizeof(sym_base *));
- } CATCH switch (exc_type) {
- case EXC_NOMEM:
- TRACK_POP;
- return (bin->next);
- default:
- TRACK_POP;
- RETHROW;
- } END_TRY;
-
- t->c = SYM_LIMIT(t->mask + 1); /* Set load value */
- t->mask = (t->mask + 1) * 2 - 1; /* Set the new mask value */
-
- /* --- Now wander through the table rehashing things --- *
- *
- * This loop is very careful to avoid problems with aliasing. The items
- * are dealt with from the end backwards to avoid overwriting bins before
- * they've been processed.
- */
-
- lim = (t->mask + 1) >> 1;
- for (i = 0; i < lim; i++) {
-
- /* --- Some initialisation --- */
-
- r = t->a[i]; /* Find the list we're dissecting */
- p = (sym_base *)(t->a + i); /* Find bit-clear list */
- q = (sym_base *)(t->a + i + lim); /* And the bit-set lsit */
-
- /* --- Now go through the @r@ list --- */
-
- while (r) {
- if (r->hash & m) /* Is the next bit set? */
- q = q->next = r; /* Yes, so fit up previous link */
- else
- p = p->next = r; /* No, so fit up previous link */
- r = r->next; /* Move onto the next item */
- }
- p->next = q->next = 0; /* Null terminate the new lists */
- }
-
- TRACK_POP;
- }
+ if (t->load)
+ t->load--;
+ if (!t->load && hash_extend(&t->t))
+ t->load = SYM_LIMIT(t->t.mask + 1);
/* --- Finished that, so return the new symbol block --- */
- return (p);
+ return (q);
}
/* --- @sym_remove@ --- *
*
- * Arguments: @sym_table *i@ = pointer to a symbol table object
- * @void *b@ = pointer to symbol table entry
+ * Arguments: @sym_table *t@ = pointer to a symbol table object
+ * @void *p@ = pointer to symbol table entry
*
* Returns: ---
*
* to the entry should already be gone by this point.
*/
-void sym_remove(sym_table *t, void *b)
+void sym_remove(sym_table *t, void *p)
{
- /* --- A quick comment --- *
- *
- * Since the @sym_base@ block contains the hash, finding the element in the
- * bin list is really quick -- it's not worth bothering with things like
- * doubly linked lists.
- */
-
- sym_base *p = b;
- sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
-
- /* --- Find the item in the bin list --- */
-
- while (bin->next) {
- if (bin->next == p)
- break;
- bin = bin->next;
- }
- if (!bin->next)
- return;
-
- /* --- Now just remove the item from the list and free it --- *
- *
- * Oh, and bump the load counter.
- */
-
- bin->next = p->next;
- if (p->len > SYM_BUFSZ)
- sub_free(p->name.p, p->len);
- free(p);
- t->c++;
+ sym_base *q = p;
+ hash_remove(&t->t, &q->b);
+ xfree(q);
+ t->load++;
}
-/* --- @sym_createIter@ --- *
+/* --- @sym_mkiter@ --- *
*
* Arguments: @sym_iter *i@ = pointer to an iterator object
* @sym_table *t@ = pointer to a symbol table object
* iterate through a symbol table.
*/
-void sym_createIter(sym_iter *i, sym_table *t)
-{
- i->t = t;
- i->i = 0;
- i->n = 0;
-}
+void sym_mkiter(sym_iter *i, sym_table *t) { SYM_MKITER(i, t); }
/* --- @sym_next@ --- *
*
void *sym_next(sym_iter *i)
{
- sym_base *p;
-
- /* --- Find the next item --- */
-
- while (!i->n) {
- if (i->i > i->t->mask)
- return (0);
- i->n = i->t->a[i->i++];
- }
-
- /* --- Update the iterator block --- */
-
- p = i->n;
- i->n = p->next;
-
- /* --- Done --- */
-
+ void *p;
+ SYM_NEXT(i, p);
return (p);
}
-/*----- Symbol table test code --------------------------------------------*/
-
-#ifdef TEST_RIG
-
-#include <errno.h>
-#include <time.h>
-
-typedef struct sym_word {
- sym_base base;
- size_t i;
-} sym_word;
-
-
-/* --- What it does --- *
- *
- * Reads the file /usr/dict/words (change to some other file full of
- * interesting and appropriate bits of text to taste) into a big buffer and
- * picks apart into lines. Then picks lines at random and enters them into
- * the symbol table.
- */
-
-int main(void)
-{
- char *buff, *p, *lim;
- size_t sz, done;
- FILE *fp;
- int i;
- char **line;
- sym_word **flag;
- sym_table tbl;
- int entries;
-
- /* --- Initialize for reading the file --- */
-
- sz = BUFSIZ;
- buff = xmalloc(sz + 1);
- done = 0;
- sub_init();
-
- if ((fp = fopen("/usr/dict/words", "r")) == 0)
- fprintf(stderr, "buggered ;-( (%s)\n", strerror(errno));
-
- /* --- Read buffers of text --- *
- *
- * Read a buffer. If more to come, double the buffer size and try again.
- * This is the method I recommended to comp.lang.c, so I may as well try
- * it.
- */
-
- for (;;) {
- i = fread(buff + done, 1, sz - done, fp);
- done += i;
- if (done != sz)
- break;
- sz <<= 1;
- buff = xrealloc(buff, sz + 1);
- }
-
- /* --- Count the lines --- */
-
- lim = buff + done;
-
- sz = 1;
- for (p = buff; p < lim; p++)
- if (*p == '\n') sz++;
-
- /* --- Build a table of line starts --- */
-
- line = xmalloc(sz * sizeof(char *));
- i = 0;
- line[i++] = buff;
- for (p = buff; p < lim; p++)
- if (*p == '\n') *p = 0, line[i++] = p + 1;
- *lim = 0;
-
- /* --- Build a table of lines --- *
- *
- * This reverses the mapping which the symbol table performs, so that its
- * accuracy can be tested.
- */
-
- flag = xmalloc(sz * sizeof(sym_word *));
- for (i = 0; i < sz; i++)
- flag[i] = 0;
- entries = 0;
-
- sym_createTable(&tbl);
-
- for (;;) {
- i = (unsigned)rand() % sz;
-
- switch (rand() % 5)
- {
- case 0: {
- sym_word *w;
-
- printf("find `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('.'); fflush(stdout);
- }
-
- w = sym_find(&tbl, line[i], -1, 0, 0);
- if (w != flag[i])
- printf("*** error: find `%s' gave %p not %p\n",
- line[i], (void *)w, (void *)flag[i]);
- else if (w && w->i != i)
- printf("*** error: find sym for `%s' gives index %i not %i\n",
- line[i], w->i, i);
- } break;
-
- case 1: {
- unsigned f;
- sym_word *w;
-
- printf("create `%s'\n", line[i]);
- if ((rand() & 1023) == 0) {
- putchar('+'); fflush(stdout);
- }
-
- w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
- if (f)
- {
- if (w != flag[i])
- printf("*** error: create `%s' gave %p not %p\n",
- line[i], (void *)w, (void *)flag[i]);
- else if (w && w->i != i)
- printf("*** error: create sym for `%s' gives index %i not %i\n",
- line[i], w->i, i);
- }
- else
- {
- if (flag[i])
- printf("*** error: create `%s' gave new block, should be %p\n",
- line[i], (void *)flag[i]);
- else {
- flag[i] = w;
- w->i = i;
- entries++;
- }
- }
- } break;
-
- case 2: {
- sym_iter it;
- sym_word *w, **ntbl;
- int v;
-
- if (!entries)
- break;
- v = (rand() % entries) == 0;
- if (!v)
- break;
- printf("\niterated %i entries\n", entries);
- break;
-
- printf("iterate\n");
-
- ntbl = xmalloc(sz * sizeof(sym_word *));
- memcpy(ntbl, flag, sz * sizeof(sym_word *));
- sym_createIter(&it, &tbl);
-
- while ((w = sym_next(&it)) != 0) {
- if (ntbl[w->i] == 0)
- printf("*** error: iterate returned duff item %i\n", w->i);
- else
- ntbl[w->i] = 0;
- }
-
- for (i = 0; i < sz; i++)
- if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
- i);
- free(ntbl);
- } break;
-
- case 3: {
- sym_base *b;
- int v = rand() & 255 ? 0 : 1;
- break;
-
- printf("dump\n");
-
- for (i = 0; i <= tbl.mask; i++) {
- if (!tbl.a[i]) continue;
- if (v) printf(" %i: ", i);
- b = tbl.a[i];
- while (b) {
- if ((b->hash & tbl.mask) != i)
- printf("*** error: bad hash value found");
- if (v) printf("`%s'(%08lx:%lu) ",
- line[((sym_word *)b)->i],
- b->hash,
- b->hash & tbl.mask);
- b = b->next;
- }
- if (v) putchar('\n');
- }
- } break;
-
- case 4: {
- if (flag[i]) {
- printf("remove `%s'\n", SYM_NAME(&flag[i]->base));
- if ((rand() & 1023) == 0) {
- putchar('-'); fflush(stdout);
- }
- sym_remove(&tbl, flag[i]);
- flag[i] = 0;
- entries--;
- }
- } break;
- }
-
- }
-
- return (0);
-}
-
-#endif
-
/*----- That's all, folks -------------------------------------------------*/