3 * $Id: sym.c,v 1.2 1997/08/04 10:24:25 mdw Exp $
5 * Symbol table management
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of `become'
14 * `Become' is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * `Become' is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with `become'; if not, write to the Free Software Foundation,
26 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 /*----- Revision history --------------------------------------------------*
32 * Revision 1.2 1997/08/04 10:24:25 mdw
33 * Sources placed under CVS control.
35 * Revision 1.1 1997/07/21 13:47:44 mdw
40 /*----- Header files ------------------------------------------------------*/
42 /* --- ANSI headers --- */
48 /* --- Local headers --- */
53 /*----- Tuning parameters -------------------------------------------------*/
55 /* --- Initial hash table size --- *
57 * This is the initial @mask@ value. It must be of the form %$2^n - 1$%,
58 * so that it can be used to mask of the bottom bits of a hash value.
61 #define sym__iSize 255 /* Size of a new hash table */
63 /* --- Maximum load factor --- *
65 * This parameter controls how much the table has to be loaded before the
66 * table is extended. The number of elements %$n$%, the number of bins %$b$%
67 * and the limit %$l$% satisfy the relation %$n < bl$%; if a new item is
68 * added to the table and this relation is found to be false, the table is
71 * The parameter @sym__limFactor@ is the value of %$l$% multiplied by 256;
72 * fixed point arithmetic is used to calculate the allowable load. Note
73 * that the exact calculation of this criterion isn't important; what's more
74 * significant is that the parameter allows tuning of the rehashing process.
76 * The current value is %$3 \over 4$%, which appears to be reasonable on the
80 #define sym__limFactor 0x0C0 /* Load factor for growing table */
82 /*----- CRC table ---------------------------------------------------------*/
84 /*****************************************************************/
86 /* CRC LOOKUP TABLE */
87 /* ================ */
89 /* The following CRC lookup table was generated automagically */
90 /* by `crcgen', which is based on the Rocksoft^tm Model CRC */
91 /* Algorithm Table Generation Program. The model parameters */
92 /* supplied to `crcgen' were: */
95 /* Poly : 0x04C11DB7 */
96 /* Init : 0xFFFFFFFF */
97 /* XorOut : 0xFFFFFFFF */
99 /* Check : 0xCBF43926 */
101 /* For more information on the Rocksoft^tm Model CRC Algorithm, */
102 /* see the document titled `A Painless Guide to CRC Error */
103 /* Detection Algorithms' by Ross Williams */
104 /* (ross@@guest.adelaide.edu.au.). This document is likely to be */
105 /* in the FTP archive `ftp.adelaide.edu.au/pub/rocksoft'. */
107 /*****************************************************************/
109 static unsigned long sym__crcTable[256] = {
110 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
111 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
112 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
113 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
114 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
115 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
116 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
117 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
118 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
119 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
120 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
121 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
122 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
123 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
124 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
125 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
126 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
127 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
128 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
129 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
130 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
131 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
132 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
133 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
134 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
135 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
136 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
137 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
138 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
139 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
140 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
141 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
142 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
143 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
144 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
145 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
146 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
147 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
148 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
149 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
150 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
151 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
152 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
153 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
154 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
155 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
156 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
157 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
158 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
159 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
160 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
161 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
162 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
163 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
164 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
165 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
166 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
167 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
168 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
169 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
170 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
171 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
172 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
173 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL,
176 /*****************************************************************/
177 /* End of CRC Lookup Table */
178 /*****************************************************************/
180 /* --- Work out the CRC of a bytestring --- */
182 #define sym__crc(s_, l_, hv_) do { \
183 unsigned long h_ = 0xFFFFFFFFL; \
184 const char *p_ = s_; \
185 const char *e_ = (s_) + (l_); \
188 h_ = (h_ >> 8) ^ (sym__crcTable[(*p_++ ^ h_) & 0xFF]); \
189 hv_ = ~h_ & 0xFFFFFFFFL; \
192 /*----- Main code ---------------------------------------------------------*/
196 /* --- @sym_createTable@ --- *
198 * Arguments: @sym_table *t@ = symbol table to initialise
202 * Use: Initialises the given symbol table.
205 void sym_createTable(sym_table *t)
209 t->mask = sym__iSize;
210 t->c = (sym__iSize * sym__limFactor) >> 8;
211 t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
213 for (i = 0; i < sym__iSize + 1; i++)
217 /* --- @sym_destroyTable@ --- *
219 * Arguments: @sym_table *t@ = pointer to symbol table in question
223 * Use: Destroys a symbol table, freeing all the memory it used to
227 void sym_destroyTable(sym_table *t)
232 for (i = 0; i <= t->mask; i++) {
242 /* --- @sym_find@ --- *
244 * Arguments: @sym_table *t@ = pointer to symbol table in question
245 * @const char *n@ = pointer to symbol table to look up
246 * @long l@ = length of the name string or negative to measure
247 * @size_t sz@ = size of desired symbol object, or zero
248 * @unsigned *f@ = pointer to a flag, or null.
250 * Returns: The address of a @sym_base@ structure, or null if not found
253 * Use: Looks up a symbol in a given symbol table. The name is
254 * passed by the address of its first character. The length
255 * may be given, in which case the name may contain arbitrary
256 * binary data, or it may be given as a negative number, in
257 * which case the length of the name is calculated as
260 * The return value is the address of a pointer to a @sym_base@
261 * block (which may have other things on the end, as above). If
262 * the symbol could be found, the return value points to the
263 * symbol block. If the symbol wasn't there, then if @sz@ is
264 * nonzero, a new symbol is created and its address is returned;
265 * otherwise a null pointer is returned.
267 * The value of @*f@ indicates whether a new symbol entry was
268 * created: a nonzero value indicates that an old value was
272 void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
274 unsigned long hash; /* Hash value for user's name */
275 size_t len = l < 0 ? strlen(n) + 1 : l; /* Find length of user's name */
276 sym_base *bin; /* Bin containing our item */
277 sym_base *p, *q; /* Pointer wandering through list */
279 /* --- Find the correct bin --- */
281 sym__crc(n, len, hash); /* Find hash value for this name */
282 bin = p = (sym_base *)(t->a + (hash & t->mask));
284 /* --- Search the bin list --- */
287 if (hash == p->next->hash && /* Check hash values first */
288 len == p->next->len && /* Then compare string lengths */
289 !memcmp(n, p->next->name, len)) /* And finally compare the strings */
291 /* --- Found a match --- *
293 * As a minor, and probably pointless, tweak, move the item to the
294 * front of its bin list.
297 q = p->next; /* Find the actual symbol block */
298 p->next = q->next; /* Extract block from bin list */
299 q->next = bin->next; /* Set up symbol's next pointer */
300 bin->next = q; /* And reinsert the block */
302 /* --- Return the block --- */
304 if (f) *f = 1; /* Didn't fail to find the item */
305 return (q); /* And return the block */
308 p = p->next; /* Move onto the next item */
311 /* --- Couldn't find the item there --- */
313 if (f) *f = 0; /* Failed to find the block */
314 if (!sz) return (0); /* Return zero if not creating */
316 /* --- Create a new symbol block and initialise it --- */
318 p = xmalloc(sz); /* Create a new symbol block */
319 p->next = bin->next; /* Set up the next pointer */
320 p->hash = hash; /* Set up the hash value too */
321 p->name = xmalloc(len); /* Allocate a block for the name */
322 p->len = len; /* And set up the string length */
323 memcpy(p->name, n, len); /* And copy the string over */
325 bin->next = p; /* Put the pointer into the bin */
327 /* --- Consider growing the array --- */
330 unsigned long m = t->mask + 1; /* Next set bit in has word */
331 sym_base *p, *q, *r; /* More useful pointers */
332 size_t i, lim; /* Loop counter and limit */
334 /* --- Update values in the anchor block --- */
336 t->c = ((t->mask + 1) * sym__limFactor) >> 8; /* Set load value */
337 t->mask = (t->mask + 1) * 2 - 1; /* Set the new mask value */
338 t->a = xrealloc(t->a, (t->mask + 1) * sizeof(sym_base *));
340 /* --- Now wander through the table rehashing things --- *
342 * This loop is very careful to avoid problems with aliasing. The items
343 * are dealt with from the end backwards to avoid overwriting bins before
344 * they've been processed.
347 lim = (t->mask + 1) >> 1;
348 for (i = 0; i < lim; i++) {
349 /* --- Some initialisation --- */
351 r = t->a[i]; /* Find the list we're dissecting */
352 p = (sym_base *)(t->a + i); /* Find bit-clear list */
353 q = (sym_base *)(t->a + i + lim); /* And the bit-set lsit */
355 /* --- Now go through the @r@ list --- */
358 if (r->hash & m) /* Is the next bit set? */
359 q = q->next = r; /* Yes, so fit up previous link */
361 p = p->next = r; /* No, so fit up previous link */
362 r = r->next; /* Move onto the next item */
364 p->next = q->next = 0; /* Null terminate the new lists */
368 /* --- Finished that, so return the new symbol block --- */
373 /* --- @sym_remove@ --- *
375 * Arguments: @sym_table *i@ = pointer to a symbol table object
376 * @void *b@ = pointer to symbol table entry
380 * Use: Removes the object from the symbol table. The space occupied
381 * by the object and its name is freed; anything else attached
382 * to the entry should already be gone by this point.
385 void sym_remove(sym_table *t, void *b)
387 /* --- A quick comment --- *
389 * Since the @sym_base@ block contains the hash, finding the element in the
390 * bin list is really quick -- it's not worth bothering with things like
391 * doubly linked lists.
395 sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
397 /* --- Find the item in the bin list --- */
407 /* --- Now just remove the item from the list and free it --- *
409 * Oh, and bump the load counter.
418 /* --- @sym_createIter@ --- *
420 * Arguments: @sym_iter *i@ = pointer to an iterator object
421 * @sym_table *t@ = pointer to a symbol table object
425 * Use: Creates a new symbol table iterator which may be used to
426 * iterate through a symbol table.
429 void sym_createIter(sym_iter *i, sym_table *t)
436 /* --- @sym_next@ --- *
438 * Arguments: @sym_iter *i@ = pointer to iterator object
440 * Returns: Pointer to the next symbol found, or null when finished.
442 * Use: Returns the next symbol from the table. Symbols are not
443 * returned in any particular order.
446 void *sym_next(sym_iter *i)
450 /* --- Find the next item --- */
453 if (i->i > i->t->mask)
455 i->n = i->t->a[i->i++];
458 /* --- Update the iterator block --- */
470 /*----- CRC test code -----------------------------------------------------*/
477 sym__crc("123456789", 9, h);
478 printf("crc check == %04x\n", h);
484 /*----- Symbol table test code --------------------------------------------*/
493 typedef struct sym_word {
499 /* --- What it does --- *
501 * Reads the file /usr/dict/words (change to some other file full of
502 * interesting and appropriate bits of text to taste) into a big buffer and
503 * picks apart into lines. Then picks lines at random and enters them into
509 char *buff, *p, *lim;
518 /* --- Initialise for reading the file --- */
521 buff = xmalloc(sz + 1);
524 if ((fp = fopen("/usr/dict/words", "r")) == 0)
525 fprintf(stderr, "buggered ;-( (%s)\n", strerror(errno));
527 /* --- Read buffers of text --- *
529 * Read a buffer. If more to come, double the buffer size and try again.
530 * This is the method I recommended to comp.lang.c, so I may as well try
535 i = fread(buff + done, 1, sz - done, fp);
540 buff = xrealloc(buff, sz + 1);
543 /* --- Count the lines --- */
548 for (p = buff; p < lim; p++)
549 if (*p == '\n') sz++;
551 /* --- Build a table of line starts --- */
553 line = xmalloc(sz * sizeof(char *));
556 for (p = buff; p < lim; p++)
557 if (*p == '\n') *p = 0, line[i++] = p + 1;
560 /* --- Build a table of lines --- *
562 * This reverses the mapping which the symbol table performs, so that its
563 * accuracy can be tested.
566 flag = xmalloc(sz * sizeof(sym_word *));
567 for (i = 0; i < sz; i++)
571 sym_createTable(&tbl);
574 i = (unsigned)rand() % sz;
581 /* printf("find `%s'\n", line[i]); */
582 if ((rand() & 1023) == 0) {
583 putchar('.'); fflush(stdout);
586 w = sym_find(&tbl, line[i], -1, 0, 0);
588 printf("*** error: find `%s' gave %p not %p\n",
589 line[i], (void *)w, (void *)flag[i]);
590 else if (w && w->i != i)
591 printf("*** error: find sym for `%s' gives index %i not %i\n",
599 /* printf("create `%s'\n", line[i]); */
600 if ((rand() & 1023) == 0) {
601 putchar('+'); fflush(stdout);
604 w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
608 printf("*** error: create `%s' gave %p not %p\n",
609 line[i], (void *)w, (void *)flag[i]);
610 else if (w && w->i != i)
611 printf("*** error: create sym for `%s' gives index %i not %i\n",
617 printf("*** error: create `%s' gave new block, should be %p\n",
618 line[i], (void *)flag[i]);
630 int v = (rand() % entries) == 0;
633 printf("\niterated %i entries\n", entries);
638 ntbl = xmalloc(sz * sizeof(sym_word *));
639 memcpy(ntbl, flag, sz * sizeof(sym_word *));
640 sym_createIter(&it, &tbl);
642 while ((w = sym_next(&it)) != 0) {
644 printf("*** error: iterate returned duff item %i\n", w->i);
649 for (i = 0; i < sz; i++)
650 if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
657 int v = rand() & 255 ? 0 : 1;
662 for (i = 0; i <= tbl.mask; i++) {
663 if (!tbl.a[i]) continue;
664 if (v) printf(" %i: ", i);
667 if ((b->hash & tbl.mask) != i)
668 printf("*** error: bad hash value found");
669 if (v) printf("`%s'(%08lx:%lu) ",
670 line[((sym_word *)b)->i],
675 if (v) putchar('\n');
681 /* printf("remove `%s'\n", flag[i]->base.name); */
682 if ((rand() & 1023) == 0) {
683 putchar('-'); fflush(stdout);
685 sym_remove(&tbl, flag[i]);
699 /*----- That's all, folks -------------------------------------------------*/