From 03d53b73fc6663cef4493cfda7193c958557c240 Mon Sep 17 00:00:00 2001 Message-Id: <03d53b73fc6663cef4493cfda7193c958557c240.1714141986.git.mdw@distorted.org.uk> From: Mark Wooding Date: Mon, 2 Aug 1999 14:45:48 +0000 Subject: [PATCH] Break low-level hashtable code out from sym. Organization: Straylight/Edgeware From: mdw --- hash.c | 216 +++++++++++++++++++++++++++++++++++++++ hash.h | 214 ++++++++++++++++++++++++++++++++++++++ man/hash.3 | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++ sym.c | 268 ++++++++++++++---------------------------------- sym.h | 35 ++++--- 5 files changed, 821 insertions(+), 206 deletions(-) create mode 100644 hash.c create mode 100644 hash.h create mode 100644 man/hash.3 diff --git a/hash.c b/hash.c new file mode 100644 index 0000000..96c150b --- /dev/null +++ b/hash.c @@ -0,0 +1,216 @@ +/* -*-c-*- + * + * $Id: hash.c,v 1.1 1999/08/02 14:45:48 mdw Exp $ + * + * General hashtable infrastructure + * + * (c) 1999 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of the mLib utilities library. + * + * mLib is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * mLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with mLib; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Revision history --------------------------------------------------* + * + * $Log: hash.c,v $ + * Revision 1.1 1999/08/02 14:45:48 mdw + * Break low-level hashtable code out from sym. + * + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include +#include + +#include "alloc.h" +#include "bits.h" +#include "exc.h" +#include "hash.h" +#include "track.h" + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @hash_create@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to initialize + * @size_t n@ = number of bins to allocate (zero initially) + * + * Returns: --- + * + * Use: Initializes a hashtable with a given number of bins. The + * bins are initially empty. The number of bins must be a power + * of two. This is not checked. + */ + +void hash_create(hash_table *t, size_t n) +{ + hash_base **v; + TRACK_CTX("hashtable creation"); + TRACK_PUSH; + t->v = xmalloc(n * sizeof(hash_base *)); + t->mask = n - 1; + for (v = t->v; n; v++, n--) + *v = 0; + TRACK_POP; +} + +/* --- @hash_destroy@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to destroy + * + * Returns: --- + * + * Use: Frees a hashtable. The items are not freed: they are the + * responsibility of the implementation. + */ + +void hash_destroy(hash_table *t) +{ + TRACK_CTX("hashtable destruction"); + TRACK_PUSH; + free(t->v); + TRACK_POP; +} + +/* --- @hash_bin@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable + * @uint32 hash@ = hash value to look up + * + * Returns: Pointer to the bin's list head. + * + * Use: Given a hash value return the address of the appropriate list + * head. It is safe to remove the current entry from the table. + */ + +hash_base **hash_bin(hash_table *t, uint32 hash) +{ + return (HASH_BIN(t, hash)); +} + +/* --- @hash_extend@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to extend + * + * Returns: Nonzero if extension was successful. + * + * Use: Extends a hashtable. The number of bins is doubled and the + * entries are redistributed. + */ + +int hash_extend(hash_table *t) +{ + hash_base **v; + uint32 m = t->mask + 1; + size_t i; + + /* --- Push in a tracking context --- */ + + TRACK_CTX("hashtable extension"); + TRACK_PUSH; + + /* --- Allocate a new hash bin vector --- */ + + if ((v = realloc(t->v, m * 2 * sizeof(hash_base *))) == 0) { + TRACK_POP; + return (0); + } + t->v = v; + t->mask = (m * 2) - 1; + + /* --- Wander through the table rehashing things --- */ + + for (i = 0; i < m; i++) { + hash_base **p = v + i; + hash_base **q = p + m; + + while (*p) { + if (!((*p)->hash & m)) + p = &(*p)->next; + else { + *q = *p; + q = &(*q)->next; + *p = (*p)->next; + } + } + *p = 0; + *q = 0; + } + + TRACK_POP; + return (1); +} + +/* --- @hash_remove@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable + * @hash_base *p@ = pointer to item to remove + * + * Returns: --- + * + * Use: Removes an item from a hashtable. The item itself is not + * freed, although it is removed from the data structure and is + * safe to free. + */ + +void hash_remove(hash_table *t, hash_base *p) +{ + hash_base **b = HASH_BIN(t, p->hash); + while (*b) { + if (*b == p) { + *b = p->next; + return; + } + b = &(*b)->next; + } + return; +} + +/* --- @hash_mkiter@ --- * + * + * Arguments: @hash_iter *i@ = pointer to iterator to create + * @hash_table *t@ = pointer to hashtable to iterate + * + * Returns: --- + * + * Use: Initializes a hashtable iterator. + */ + +void hash_mkiter(hash_iter *i, hash_table *t) { HASH_MKITER(i, t); } + +/* --- @hash_next@ --- * + * + * Arguments: @hash_iter *i@ = pointer to iterator + * + * Returns: Pointer to the next hashtable entry found, or null. + * + * Use: Returns the next entry from the hashtable. + */ + +hash_base *hash_next(hash_iter *i) +{ + hash_base *b; + HASH_NEXT(i, b); + return (b); +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/hash.h b/hash.h new file mode 100644 index 0000000..6d739a7 --- /dev/null +++ b/hash.h @@ -0,0 +1,214 @@ +/* -*-c-*- + * + * $Id: hash.h,v 1.1 1999/08/02 14:45:48 mdw Exp $ + * + * General hashtable infrastructure + * + * (c) 1999 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of the mLib utilities library. + * + * mLib is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * mLib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with mLib; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Revision history --------------------------------------------------* + * + * $Log: hash.h,v $ + * Revision 1.1 1999/08/02 14:45:48 mdw + * Break low-level hashtable code out from sym. + * + */ + +#ifndef HASH_H +#define HASH_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Notes -------------------------------------------------------------* + * + * This isn't a complete implementation of anything. It's a collection of + * useful types and functions which will help when building hashtables of + * things. The general-purpose hashtable is provided by the @sym@ functions. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include + +#ifndef BITS_H +# include "bits.h" +#endif + +/*----- Data structures ---------------------------------------------------*/ + +/* --- Hashtable basics --- * + * + * This contains the bare minimum to actually get anything useful done. In + * particular it doesn't maintain any weighting information: when to extend + * the table is left up to the particular implementation. + */ + +typedef struct hash_table { + uint32 mask; /* Bit mask of hash bits */ + struct hash_base **v; /* Vector of hash bins */ +} hash_table; + +/* --- A hashtable entry --- * + * + * This is the bare minimum of what needs to be remembered in each hashtable + * entry. Comparison of elements is left to the implementation, so I don't + * need to know anything about how to represent hash keys here. + */ + +typedef struct hash_base { + struct hash_base *next; /* Next entry in hash bin */ + uint32 hash; /* Hash value for this entry */ +} hash_base; + +/* --- A hashtable iterator --- */ + +typedef struct hash_iter { + hash_table *t; /* Hashtable being iterated */ + hash_base *p; /* Address of next item to return */ + size_t i; /* Index of next hash bin to use */ +} hash_iter; + +/*----- Functions provided ------------------------------------------------*/ + +/* --- @hash_create@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to initialize + * @size_t n@ = number of bins to allocate (zero initially) + * + * Returns: --- + * + * Use: Initializes a hashtable with a given number of bins. The + * bins are initially empty. The number of bins must be a power + * of two. This is not checked. + */ + +extern void hash_create(hash_table */*t*/, size_t /*n*/); + +/* --- @hash_destroy@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to destroy + * + * Returns: --- + * + * Use: Frees a hashtable. The items are not freed: they are the + * responsibility of the implementation. + */ + +extern void hash_destroy(hash_table */*t*/); + +/* --- @hash_bin@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable + * @uint32 hash@ = hash value to look up + * + * Returns: Pointer to the bin's list head. + * + * Use: Given a hash value return the address of the appropriate list + * head. It is safe to remove the current entry from the table. + */ + +#define HASH_BIN(t, hash) ((t)->v + ((hash) & (t)->mask)) + +extern hash_base **hash_bin(hash_table */*t*/, uint32 /*hash*/); + +/* --- @hash_extend@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable to extend + * + * Returns: Nonzero if extension was successful. + * + * Use: Extends a hashtable. The number of bins is doubled and the + * entries are redistributed. + */ + +extern int hash_extend(hash_table */*t*/); + +/* --- @hash_remove@ --- * + * + * Arguments: @hash_table *t@ = pointer to hashtable + * @hash_base *p@ = pointer to item to remove + * + * Returns: --- + * + * Use: Removes an item from a hashtable. The item itself is not + * freed, although it is removed from the data structure and is + * safe to free. + */ + +extern void hash_remove(hash_table */*t*/, hash_base */*p*/); + +/* --- @hash_mkiter@ --- * + * + * Arguments: @hash_iter *i@ = pointer to iterator to create + * @hash_table *t@ = pointer to hashtable to iterate + * + * Returns: --- + * + * Use: Initializes a hashtable iterator. + */ + +#define HASH_MKITER(i_, t_) ((i_)->t = (t_), (i_)->p = 0, (i_)->i = 0) + +extern void hash_mkiter(hash_iter */*i*/, hash_table */*t*/); + +/* --- @hash_next@ --- * + * + * Arguments: @hash_iter *i@ = pointer to iterator + * + * Returns: Pointer to the next hashtable entry found, or null. + * + * Use: Returns the next entry from the hashtable. + */ + +#define HASH_NEXT(i_, b_) do { \ + hash_iter *_i = (i_); \ + hash_base *_p; \ + hash_table *_t = _i->t; \ + uint32 _m = _t->mask; \ + \ + for (;;) { \ + if (_i->p) { \ + _p = _i->p; \ + _i->p = _p->next; \ + break; \ + } else if (_i->i > _m) { \ + _p = 0; \ + break; \ + } else \ + _i->p = _t->v[_i->i++]; \ + } \ + (b_) = _p; \ +} while (0) + +extern hash_base *hash_next(hash_iter */*i*/); + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/man/hash.3 b/man/hash.3 new file mode 100644 index 0000000..9df4670 --- /dev/null +++ b/man/hash.3 @@ -0,0 +1,294 @@ +.\" -*-nroff-*- +.de VS +.sp 1 +.RS +.nf +.ft B +.. +.de VE +.ft R +.fi +.RE +.sp 1 +.. +.de hP +.IP +.ft B +\h'-\w'\\$1\ 'u'\\$1\ \c +.ft P +.. +.ie t .ds o \(bu +.el .ds o o +.TH hash 3 "2 August 1999" mLib +.SH "NAME" +hash \- low-level hashtable implementation +.\" @hash_create +.\" @hash_destroy +.\" @hash_bin +.\" @hash_extend +.\" @hash_remove +.\" @hash_mkiter +.\" @hash_next +.\" +.\" @HASH_BIN +.\" @HASH_MKITER +.\" @HASH_NEXT +.\" +.SH "SYNOPSIS" +.nf +.B "#include " + +.BI "void hash_create(hash_table *" t ", size_t " n ); +.BI "void hash_destroy(hash_table *" t ); +.BI "hash_base **hash_bin(hash_table *" t ", uint32 " hash ); +.BI "int hash_extend(hash_table *" t ); +.BI "void hash_remove(hash_table *" t ", hash_base * " b ); +.BI "void hash_mkiter(hash_iter *" i ", hash_table *" t ); +.BI "hash_base *hash_next(hash_iter *" i ); + +.BI "hash_base **HASH_BIN(hash_table *" t ", uint32 " hash ); +.BI "void HASH_MKITER(hash_iter *" i ", hash_table *" t ); +.BI "void HASH_NEXT(hash_iter *" i ", " b ); +.fi +.SH "OVERVIEW" +The +.B hash +functions provide the basis for an extensible hashtable implementation. +The implementation is not complete. Many decisions have been left to +the user, including: +.hP \*o +How keys should be represented, hashed and compared. +.hP \*o +How objects contained within the table should be allocated. +.hP \*o +When the hashtable should be extended. +.PP +A complete hashtable implementation will need to take the above +decisions. If you just want a prepackaged solution, see +.BR sym (3) +which provides one. +.SH "IMPLEMENTATION DETAILS" +Each item in the hashtable is assigned a 32-bit integer +.IR hash : +a number computed somehow from the item's data such that two items which +are considered equal will yield the same hash. Ideally, different items +will yield different hashes. It is important for this implementation +that all bits of the hash are similarly good. +.PP +In order to look up an item, the high bits of the hash are masked off +and the remainder used as an index into a vector of +.IR "bin lists" . +Each bin contains a list of items with identical low-order bits of their +hashes. +.PP +A table expansion involves doubling the size of the bin vector. Each +bin list is then split into two, items being placed into a new bin +depending on the setting of the next lowest hash bit. By expanding the +hashtable as needed, lookups remain constant-time. +.PP +A low-level hashtable is represented by a +.B hash_table +structure. It contains two members: +.TP +.B mask +The current bitmask to be applied to hashes. This is one less than the +current number of bins in the hashtable, and is applied to hash values +in order to decide which bin an item should be in. +.TP +.B v +The bin vector. It is an array of pointers to hashtable items. +.PP +A hashtable item consists of a +.B hash_base +structure. A full hashtable implementation will need to extend this +structure by adding keying information and other data; the +.B hash_base +only contains the bare minimum of information needed to maintain the +hashtable at a low level. It contains the following members: +.TP +.B next +Pointer to the next item in the bin list. The final item has a null +.B next +pointer. The entry in the bin vector is null if the bin list is empty. +It is up to the high-level implementation to insert items into the list. +.TP +.B hash +The hash for this item. This must be the full 32-bit hash for the +current item. It is used during hashtable expansion to determine which +bin an item should be moved to. +.SH "FUNCTIONALITY PROVIDED" +This section describes the functions and macros provided for building +hashtables. Code examples are given throughout. They assume the +following definitions: +.VS +/* --- A table of items --- */ + +typedef struct item_table { + hash_table t; + size_t load; +}; + +/* --- An item --- */ + +typedef struct item { + hash_base b; + const char *k; + /* ... */ +} item; +.VE +The implementation presented here is simple but relatively bad. The +source file +.B sym.c +presents a more realistic example, but is rather more complex. +.SS "Initialization and finalization" +An empty hashtable is initialized by calling +.B hash_create +with the address of a +.B hash_table +structure to be filled in and the initial number of hash bins to create. +.PP +For example, an item table might be initialized like this: +.VS +void item_createtab(item_table *t) +{ + hash_create(&t->t, ITEM_INITSZ); + t->load = ITEM_INITLOAD; +} +.VE +A hashtable can be destroyed by calling +.B hash_destroy +with the address of the +.B hash_table +structure. This does not attempt to deallocate the individual items; +that must be done beforehand. +.PP +The usual way to deallocate the individual hashtable items is using the +iteration constructs described below. +.VS +void item_destroytab(item_table *t) +{ + hash_iter i; + hash_base *b; + for (hash_mkiter(&i, &t->t); (b = hash_next(&i)) != 0; ) { + item *ii = (item *)b; + free(ii->k); + /* ... */ + DESTROY(ii); + } + hash_destroy(&t->t); +} +.VE +.sp -1 +.SS "Searching, adding and removing" +Items must be searched for and added by hand. +.PP +The macro +.B HASH_BIN +returns the address of the bin list haed for a particular hashtable and +hash value. The function +.B hash_bin +works the same way and provides the same result, but since the macro is +very simple its use is preferred. However, it will evaluate its +arguments multiple times. +.PP +Once the bin list has been found, it's fairly easy to search for an +exact match. A simple search might look something like this: +.VS +item *lookup(item_table *t, const char *k) +{ + uint32 h = hash(k); /* Hash @k@ somehow */ + hash_base **bin = HASH_BIN(&t->t, h); + hash_base *b; + for (b = *bin; b; b = b->next) { + item *i = (item *)b; + if (h == i->b.hash && strcmp(k, i->k) == 0) + return (i); + } + return (0); +} +.VE +Insertion is also relatively trivial given the bin list head. Insertion +may make the hashtable too large, so it might be necessary to extend +it. Extension is performed by +.BR hash_extend , +which is passed only the address of the hashtable. It returns nonzero +if extension was successful. +.VS +item *add(item_table *t, const char *k, /* ... */) +{ + item *i; + uint32 h; + hash_base **bin; + + /* --- See if the item is already there --- */ + + if ((i = = lookup(t, k)) != 0) + return (i); + + /* --- Make a new hashtable item --- */ + + i = CREATE(item); + i->k = xstrdup(k); + /* ... */ + + /* --- Link it into the bin list --- */ + + h = i->b.hash = hash(k); + bin = HASH_BIN(&t->t, h); + i->b.next = *bin; + *bin = &i->b.next; + + /* --- Maybe extend the hashtable --- */ + + if (t->load) + t->load--; + else if (hash_extend(&t->t)) + t->load = recalc_load(t); + + /* --- Done --- */ + + return (i); +} +.VE +The +.B sym +implementation is rather more sophisticated in its approach but the idea +is the same. In particular, +.B sym +provides a single interface for lookup and insertion which prevents the +unnecessary rehashing performed by the above code. +.PP +Removal of items is more straightforward. The function +.B hash_remove +will unlink a given item from its bin list, after which point it is safe +to remove. +.SS "Iteration" +Iteration allows code to be performed on all the items in a hashtable. +This is done using an +.I iterator +object, represented by a +.B hash_iter +structure. An iterator is initialized by calling +.BR hash_mkiter . +Each call to +.B hash_next +yields a different item from the hashtable until there are none left, a +condition signified by a null return value. +.PP +The macros +.B HASH_MKITER +and +.B HASH_NEXT +do the same jobs as the above functions. However, +.B HASH_NEXT +has a slightly bizarre argument passing convention: its second argument +is an +.I lvalue +which is updated to contain the address of the next item. +.PP +The finalization code above contained an example of iteration. +.SH "SEE ALSO" +.BR sym (3), +.BR mLib (3). +.SH "AUTHOR" +Mark Wooding, diff --git a/sym.c b/sym.c index 394b761..03d2e82 100644 --- a/sym.c +++ b/sym.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: sym.c,v 1.7 1999/06/01 09:49:08 mdw Exp $ + * $Id: sym.c,v 1.8 1999/08/02 14:45:48 mdw Exp $ * * Symbol table management * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: sym.c,v $ + * Revision 1.8 1999/08/02 14:45:48 mdw + * Break low-level hashtable code out from sym. + * * Revision 1.7 1999/06/01 09:49:08 mdw * Allow things to be looked up by just their caller-supplied hashes. This * actually needs to be thought through better. @@ -68,6 +71,7 @@ #include "bits.h" #include "crc32.h" #include "exc.h" +#include "hash.h" #include "sub.h" #include "sym.h" #include "track.h" @@ -80,7 +84,7 @@ * so that it can be used to mask of the bottom bits of a hash value. */ -#define SYM_INITSZ 255 /* Size of a new hash table */ +#define SYM_INITSZ 64 /* Size of a new hash table */ /* --- Maximum load factor --- * * @@ -91,7 +95,7 @@ * doubled in size. */ -#define SYM_LIMIT(n) ((n) * 4) /* Load factor for growing table */ +#define SYM_LIMIT(n) ((n) * 2) /* Load factor for growing table */ /*----- Main code ---------------------------------------------------------*/ @@ -107,18 +111,10 @@ void sym_create(sym_table *t) { - size_t i; - TRACK_CTX("symbol table creation"); TRACK_PUSH; - - t->mask = SYM_INITSZ; - t->c = SYM_LIMIT(SYM_INITSZ); - t->a = xmalloc((t->mask + 1) * sizeof(sym_base *)); - - for (i = 0; i < SYM_INITSZ + 1; i++) - t->a[i] = 0; - + hash_create(&t->t, SYM_INITSZ); + t->load = SYM_LIMIT(SYM_INITSZ); TRACK_POP; } @@ -134,23 +130,22 @@ void sym_create(sym_table *t) void sym_destroy(sym_table *t) { - size_t i; - sym_base *p, *q; + sym_iter i; - TRACK_CTX("symbol table deletion"); + TRACK_CTX("symbol table destruction"); TRACK_PUSH; - for (i = 0; i <= t->mask; i++) { - p = t->a[i]; - while (p) { - q = p->next; - if (p->len > SYM_BUFSZ) - sub_free(p->name.p, p->len); - free(p); - p = q; - } + SYM_MKITER(&i, t); + for (;;) { + sym_base *p; + SYM_NEXT(&i, p); + if (!p) + break; + if (p->len > SYM_BUFSZ) + sub_free(p->name.p, p->len); + free(p); } - free(t->a); + hash_destroy(&t->t); TRACK_POP; } @@ -171,9 +166,7 @@ void sym_destroy(sym_table *t) * may be given, in which case the name may contain arbitrary * binary data, or it may be given as a negative number, in * which case the length of the name is calculated as - * @strlen(n) + 1@. The name pointer @n@ may also be zero; in - * this case, @l@ is taken to be a raw hash, and any element - * with a matching hash is taken to be the one wanted. + * @strlen(n) + 1@. * * The return value is the address of a pointer to a @sym_base@ * block (which may have other things on the end, as above). If @@ -192,43 +185,36 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f) { uint32 hash; size_t len = 0; - sym_base *bin; - sym_base *p, *q; + hash_base **bin, **p; + sym_base *q; /* --- Find the correct bin --- */ - if (n) { - len = l < 0 ? strlen(n) + 1 : l; - CRC32(hash, 0, n, len); - } else - hash = (uint32)l; - bin = p = (sym_base *)(t->a + (hash & t->mask)); + len = l < 0 ? strlen(n) + 1 : l; + CRC32(hash, 0, n, len); + bin = HASH_BIN(&t->t, hash); /* --- Search the bin list --- */ - while (p->next) { - if (hash == p->next->hash && - (n == 0 || (len == p->next->len && - !memcmp(n, SYM_NAME(p->next), len)))) - { + for (p = bin; *p; p = &(*p)->next) { + q = (sym_base *)*p; + if (hash == q->b.hash && len == q->len && !memcmp(n, SYM_NAME(q), len)) { + /* --- Found a match --- * * * As a minor, and probably pointless, tweak, move the item to the * front of its bin list. */ - q = p->next; - p->next = q->next; - q->next = bin->next; - bin->next = q; + (*p) = q->b.next; + q->b.next = *bin; + *bin = &q->b; /* --- Return the block --- */ if (f) *f = 1; return (q); } - - p = p->next; } /* --- Couldn't find the item there --- */ @@ -242,19 +228,19 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f) TRACK_CTX("new symbol creation"); TRACK_PUSH; - p = xmalloc(sz); - p->next = bin->next; - p->hash = hash; - p->len = len; + q = xmalloc(sz); + q->b.next = *bin; + q->b.hash = hash; + q->len = len; if (n) { if (len <= SYM_BUFSZ) - memcpy(p->name.b, n, len); + memcpy(q->name.b, n, len); else { TRY { - p->name.p = sub_alloc(len); - memcpy(p->name.p, n, len); + q->name.p = sub_alloc(len); + memcpy(q->name.p, n, len); } CATCH { - free(p); + free(q); TRACK_POP; RETHROW; } END_TRY; @@ -264,76 +250,24 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f) TRACK_POP; } - bin->next = p; + *bin = &q->b; /* --- Consider growing the array --- */ - if (t->c) - t->c--; - if (!t->c) { - uint32 m = t->mask + 1; - sym_base *p, *q, *r; - size_t i, lim; - - TRACK_CTX("symbol table extension"); - TRACK_PUSH; - - /* --- Update values in the anchor block --- */ - - TRY { - t->a = xrealloc(t->a, (t->mask + 1) * 2 * sizeof(sym_base *)); - } CATCH switch (exc_type) { - case EXC_NOMEM: - TRACK_POP; - return (bin->next); - default: - TRACK_POP; - RETHROW; - } END_TRY; - - t->c = SYM_LIMIT(t->mask + 1); - t->mask = (t->mask + 1) * 2 - 1; - - /* --- Now wander through the table rehashing things --- * - * - * This loop is very careful to avoid problems with aliasing. The items - * are dealt with from the end backwards to avoid overwriting bins before - * they've been processed. - */ - - lim = (t->mask + 1) >> 1; - for (i = 0; i < lim; i++) { - - /* --- Some initialization --- */ - - r = t->a[i]; - p = (sym_base *)(t->a + i); - q = (sym_base *)(t->a + i + lim); - - /* --- Now go through the @r@ list --- */ - - while (r) { - if (r->hash & m) - q = q->next = r; - else - p = p->next = r; - r = r->next; - } - p->next = q->next = 0; - } - - TRACK_POP; - } + if (t->load) + t->load--; + if (!t->load && hash_extend(&t->t)) + t->load = SYM_LIMIT(t->t.mask / 2 + 1); /* --- Finished that, so return the new symbol block --- */ - return (p); + return (q); } /* --- @sym_remove@ --- * * * Arguments: @sym_table *i@ = pointer to a symbol table object - * @void *b@ = pointer to symbol table entry + * @void *p@ = pointer to symbol table entry * * Returns: --- * @@ -342,38 +276,14 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f) * to the entry should already be gone by this point. */ -void sym_remove(sym_table *t, void *b) +void sym_remove(sym_table *t, void *p) { - /* --- A quick comment --- * - * - * Since the @sym_base@ block contains the hash, finding the element in the - * bin list is really quick -- it's not worth bothering with things like - * doubly linked lists. - */ - - sym_base *p = b; - sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask)); - - /* --- Find the item in the bin list --- */ - - while (bin->next) { - if (bin->next == p) - break; - bin = bin->next; - } - if (!bin->next) - return; - - /* --- Now just remove the item from the list and free it --- * - * - * Oh, and bump the load counter. - */ - - bin->next = p->next; - if (p->len > SYM_BUFSZ) - sub_free(p->name.p, p->len); - free(p); - t->c++; + sym_base *q = p; + hash_remove(&t->t, &q->b); + if (q->len > SYM_BUFSZ) + sub_free(q->name.p, q->len); + free(q); + t->load++; } /* --- @sym_mkiter@ --- * @@ -387,12 +297,7 @@ void sym_remove(sym_table *t, void *b) * iterate through a symbol table. */ -void sym_mkiter(sym_iter *i, sym_table *t) -{ - i->t = t; - i->i = 0; - i->n = 0; -} +void sym_mkiter(sym_iter *i, sym_table *t) { SYM_MKITER(i, t); } /* --- @sym_next@ --- * * @@ -406,23 +311,8 @@ void sym_mkiter(sym_iter *i, sym_table *t) void *sym_next(sym_iter *i) { - sym_base *p; - - /* --- Find the next item --- */ - - while (!i->n) { - if (i->i > i->t->mask) - return (0); - i->n = i->t->a[i->i++]; - } - - /* --- Update the iterator block --- */ - - p = i->n; - i->n = p->next; - - /* --- Done --- */ - + void *p; + SYM_NEXT(i, p); return (p); } @@ -522,10 +412,7 @@ int main(void) case 0: { sym_word *w; - printf("find `%s'\n", line[i]); - if ((rand() & 1023) == 0) { - putchar('.'); fflush(stdout); - } + printf("? %s\n", line[i]); w = sym_find(&tbl, line[i], -1, 0, 0); if (w != flag[i]) @@ -540,10 +427,7 @@ int main(void) unsigned f; sym_word *w; - printf("create `%s'\n", line[i]); - if ((rand() & 1023) == 0) { - putchar('+'); fflush(stdout); - } + printf("+ %s\n", line[i]); w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f); if (f) @@ -578,10 +462,8 @@ int main(void) v = (rand() % entries) == 0; if (!v) break; - printf("\niterated %i entries\n", entries); - break; - printf("iterate\n"); + printf(".\n"); ntbl = xmalloc(sz * sizeof(sym_word *)); memcpy(ntbl, flag, sz * sizeof(sym_word *)); @@ -589,14 +471,16 @@ int main(void) while ((w = sym_next(&it)) != 0) { if (ntbl[w->i] == 0) - printf("*** error: iterate returned duff item %i\n", w->i); - else + printf("*** error: iterate returned duff item %s\n", SYM_NAME(w)); + else { + printf(": %s\n", SYM_NAME(w)); ntbl[w->i] = 0; + } } for (i = 0; i < sz; i++) - if (ntbl[i]) printf("*** error: iterate didn't return item %i\n", - i); + if (ntbl[i]) printf("*** error: iterate didn't return item %s\n", + SYM_NAME(ntbl[i])); free(ntbl); } break; @@ -607,18 +491,18 @@ int main(void) printf("dump\n"); - for (i = 0; i <= tbl.mask; i++) { - if (!tbl.a[i]) continue; + for (i = 0; i <= tbl.b.mask; i++) { + if (!tbl.b.v[i]) continue; if (v) printf(" %i: ", i); - b = tbl.a[i]; + b = (sym_base *)tbl.b.v[i]; while (b) { - if ((b->hash & tbl.mask) != i) + if ((b->b.hash & tbl.b.mask) != i) printf("*** error: bad hash value found"); if (v) printf("`%s'(%08lx:%lu) ", line[((sym_word *)b)->i], - b->hash, - b->hash & tbl.mask); - b = b->next; + b->b.hash, + b->b.hash & tbl.b.mask); + b = (sym_base *)b->b.next; } if (v) putchar('\n'); } @@ -626,7 +510,7 @@ int main(void) case 4: { if (flag[i]) { - printf("remove `%s'\n", SYM_NAME(&flag[i]->base)); + printf("- %s\n", SYM_NAME(&flag[i]->base)); if ((rand() & 1023) == 0) { putchar('-'); fflush(stdout); } diff --git a/sym.h b/sym.h index 94d43c5..edd3284 100644 --- a/sym.h +++ b/sym.h @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: sym.h,v 1.7 1999/06/01 09:49:33 mdw Exp $ + * $Id: sym.h,v 1.8 1999/08/02 14:45:48 mdw Exp $ * * Symbol table management * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: sym.h,v $ + * Revision 1.8 1999/08/02 14:45:48 mdw + * Break low-level hashtable code out from sym. + * * Revision 1.7 1999/06/01 09:49:33 mdw * Allow things to be looked up by just their caller-supplied hashes. This * actually needs to be thought through better. @@ -69,6 +72,10 @@ # include "bits.h" #endif +#ifndef HASH_H +# include "hash.h" +#endif + /*----- Type definitions --------------------------------------------------*/ /* --- Symbol table --- * @@ -79,9 +86,8 @@ */ typedef struct sym_table { - unsigned long mask; /* Bit mask for hashing purposes */ - size_t c; /* Down counter for growing table */ - struct sym_base **a; /* Array of hash bins */ + hash_table t; + size_t load; } sym_table; /* --- A symbol table entry --- * @@ -96,8 +102,7 @@ typedef struct sym_table { #define SYM_BUFSZ 16 /* Size of local string buffer */ typedef struct sym_base { - struct sym_base *next; /* Next symbol in hash bin */ - uint32 hash; /* Hash value for symbol's name */ + hash_base b; union { char *p; /* Pointer to name string */ char b[SYM_BUFSZ]; /* Buffer containing a short name */ @@ -114,11 +119,7 @@ typedef struct sym_base { /* --- An iterator block --- */ -typedef struct sym_iter { - sym_table *t; /* Symbol table being iterated */ - sym_base *n; /* Address of next item to return */ - size_t i; /* Index of next hash bin to use */ -} sym_iter; +typedef hash_iter sym_iter; /*----- External functions ------------------------------------------------*/ @@ -162,9 +163,7 @@ extern void sym_destroy(sym_table */*t*/); * may be given, in which case the name may contain arbitrary * binary data, or it may be given as a negative number, in * which case the length of the name is calculated as - * @strlen(n) + 1@. The name pointer @n@ may also be zero; in - * this case, @l@ is taken to be a raw hash, and any element - * with a matching hash is taken to be the one wanted. + * @strlen(n) + 1@. * * The return value is the address of a pointer to a @sym_base@ * block (which may have other things on the end, as above). If @@ -207,6 +206,8 @@ extern void sym_remove(sym_table */*t*/, void */*b*/); * iterate through a symbol table. */ +#define SYM_MKITER(i, t) HASH_MKITER((i), &(t)->t) + extern void sym_mkiter(sym_iter */*i*/, sym_table */*t*/); /* --- @sym_next@ --- * @@ -219,6 +220,12 @@ extern void sym_mkiter(sym_iter */*i*/, sym_table */*t*/); * returned in any particular order. */ +#define SYM_NEXT(i, p) do { \ + hash_base *_q; \ + HASH_NEXT((i), _q); \ + (p) = (void *)_q; \ +} while (0) + extern void *sym_next(sym_iter */*i*/); /*----- That's all, folks -------------------------------------------------*/ -- [mdw]