3 * $Id: sym.c,v 1.3 1997/08/20 16:22:59 mdw Exp $
5 * Symbol table management
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of `become'
14 * `Become' is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * `Become' is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with `become'; if not, write to the Free Software Foundation,
26 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 /*----- Revision history --------------------------------------------------*
32 * Revision 1.3 1997/08/20 16:22:59 mdw
35 * Revision 1.2 1997/08/04 10:24:25 mdw
36 * Sources placed under CVS control.
38 * Revision 1.1 1997/07/21 13:47:44 mdw
43 /*----- Header files ------------------------------------------------------*/
45 /* --- ANSI headers --- */
51 /* --- Local headers --- */
56 /*----- Tuning parameters -------------------------------------------------*/
58 /* --- Initial hash table size --- *
60 * This is the initial @mask@ value. It must be of the form %$2^n - 1$%,
61 * so that it can be used to mask of the bottom bits of a hash value.
64 #define sym__iSize 255 /* Size of a new hash table */
66 /* --- Maximum load factor --- *
68 * This parameter controls how much the table has to be loaded before the
69 * table is extended. The number of elements %$n$%, the number of bins %$b$%
70 * and the limit %$l$% satisfy the relation %$n < bl$%; if a new item is
71 * added to the table and this relation is found to be false, the table is
74 * The parameter @sym__limFactor@ is the value of %$l$% multiplied by 256;
75 * fixed point arithmetic is used to calculate the allowable load. Note
76 * that the exact calculation of this criterion isn't important; what's more
77 * significant is that the parameter allows tuning of the rehashing process.
79 * The current value is %$3 \over 4$%, which appears to be reasonable on the
83 #define sym__limFactor 0x0C0 /* Load factor for growing table */
85 /*----- CRC table ---------------------------------------------------------*/
87 /*****************************************************************/
89 /* CRC LOOKUP TABLE */
90 /* ================ */
92 /* The following CRC lookup table was generated automagically */
93 /* by `crcgen', which is based on the Rocksoft^tm Model CRC */
94 /* Algorithm Table Generation Program. The model parameters */
95 /* supplied to `crcgen' were: */
98 /* Poly : 0x04C11DB7 */
99 /* Init : 0xFFFFFFFF */
100 /* XorOut : 0xFFFFFFFF */
102 /* Check : 0xCBF43926 */
104 /* For more information on the Rocksoft^tm Model CRC Algorithm, */
105 /* see the document titled `A Painless Guide to CRC Error */
106 /* Detection Algorithms' by Ross Williams */
107 /* (ross@@guest.adelaide.edu.au.). This document is likely to be */
108 /* in the FTP archive `ftp.adelaide.edu.au/pub/rocksoft'. */
110 /*****************************************************************/
112 static unsigned long sym__crcTable[256] = {
113 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
114 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
115 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
116 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
117 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
118 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
119 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
120 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
121 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
122 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
123 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
124 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
125 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
126 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
127 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
128 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
129 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
130 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
131 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
132 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
133 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
134 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
135 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
136 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
137 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
138 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
139 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
140 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
141 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
142 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
143 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
144 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
145 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
146 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
147 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
148 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
149 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
150 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
151 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
152 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
153 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
154 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
155 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
156 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
157 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
158 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
159 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
160 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
161 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
162 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
163 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
164 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
165 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
166 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
167 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
168 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
169 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
170 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
171 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
172 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
173 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
174 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
175 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
176 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL,
179 /*****************************************************************/
180 /* End of CRC Lookup Table */
181 /*****************************************************************/
183 /* --- Work out the CRC of a bytestring --- */
185 #define sym__crc(s_, l_, hv_) do { \
186 unsigned long h_ = 0xFFFFFFFFL; \
187 const char *p_ = s_; \
188 const char *e_ = (s_) + (l_); \
191 h_ = (h_ >> 8) ^ (sym__crcTable[(*p_++ ^ h_) & 0xFF]); \
192 hv_ = ~h_ & 0xFFFFFFFFL; \
195 /*----- Main code ---------------------------------------------------------*/
199 /* --- @sym_createTable@ --- *
201 * Arguments: @sym_table *t@ = symbol table to initialise
205 * Use: Initialises the given symbol table.
208 void sym_createTable(sym_table *t)
212 t->mask = sym__iSize;
213 t->c = (sym__iSize * sym__limFactor) >> 8;
214 t->a = xmalloc((t->mask + 1) * sizeof(sym_base *));
216 for (i = 0; i < sym__iSize + 1; i++)
220 /* --- @sym_destroyTable@ --- *
222 * Arguments: @sym_table *t@ = pointer to symbol table in question
226 * Use: Destroys a symbol table, freeing all the memory it used to
230 void sym_destroyTable(sym_table *t)
235 for (i = 0; i <= t->mask; i++) {
247 /* --- @sym_find@ --- *
249 * Arguments: @sym_table *t@ = pointer to symbol table in question
250 * @const char *n@ = pointer to symbol table to look up
251 * @long l@ = length of the name string or negative to measure
252 * @size_t sz@ = size of desired symbol object, or zero
253 * @unsigned *f@ = pointer to a flag, or null.
255 * Returns: The address of a @sym_base@ structure, or null if not found
258 * Use: Looks up a symbol in a given symbol table. The name is
259 * passed by the address of its first character. The length
260 * may be given, in which case the name may contain arbitrary
261 * binary data, or it may be given as a negative number, in
262 * which case the length of the name is calculated as
265 * The return value is the address of a pointer to a @sym_base@
266 * block (which may have other things on the end, as above). If
267 * the symbol could be found, the return value points to the
268 * symbol block. If the symbol wasn't there, then if @sz@ is
269 * nonzero, a new symbol is created and its address is returned;
270 * otherwise a null pointer is returned.
272 * The value of @*f@ indicates whether a new symbol entry was
273 * created: a nonzero value indicates that an old value was
277 void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f)
279 unsigned long hash; /* Hash value for user's name */
280 size_t len = l < 0 ? strlen(n) + 1 : l; /* Find length of user's name */
281 sym_base *bin; /* Bin containing our item */
282 sym_base *p, *q; /* Pointer wandering through list */
284 /* --- Find the correct bin --- */
286 sym__crc(n, len, hash); /* Find hash value for this name */
287 bin = p = (sym_base *)(t->a + (hash & t->mask));
289 /* --- Search the bin list --- */
292 if (hash == p->next->hash && /* Check hash values first */
293 len == p->next->len && /* Then compare string lengths */
294 !memcmp(n, p->next->name, len)) /* And finally compare the strings */
296 /* --- Found a match --- *
298 * As a minor, and probably pointless, tweak, move the item to the
299 * front of its bin list.
302 q = p->next; /* Find the actual symbol block */
303 p->next = q->next; /* Extract block from bin list */
304 q->next = bin->next; /* Set up symbol's next pointer */
305 bin->next = q; /* And reinsert the block */
307 /* --- Return the block --- */
309 if (f) *f = 1; /* Didn't fail to find the item */
310 return (q); /* And return the block */
313 p = p->next; /* Move onto the next item */
316 /* --- Couldn't find the item there --- */
318 if (f) *f = 0; /* Failed to find the block */
319 if (!sz) return (0); /* Return zero if not creating */
321 /* --- Create a new symbol block and initialise it --- */
323 p = xmalloc(sz); /* Create a new symbol block */
324 p->next = bin->next; /* Set up the next pointer */
325 p->hash = hash; /* Set up the hash value too */
326 p->name = xmalloc(len); /* Allocate a block for the name */
327 p->len = len; /* And set up the string length */
328 memcpy(p->name, n, len); /* And copy the string over */
330 bin->next = p; /* Put the pointer into the bin */
332 /* --- Consider growing the array --- */
335 unsigned long m = t->mask + 1; /* Next set bit in has word */
336 sym_base *p, *q, *r; /* More useful pointers */
337 size_t i, lim; /* Loop counter and limit */
339 /* --- Update values in the anchor block --- */
341 t->c = ((t->mask + 1) * sym__limFactor) >> 8; /* Set load value */
342 t->mask = (t->mask + 1) * 2 - 1; /* Set the new mask value */
343 t->a = xrealloc(t->a, (t->mask + 1) * sizeof(sym_base *));
345 /* --- Now wander through the table rehashing things --- *
347 * This loop is very careful to avoid problems with aliasing. The items
348 * are dealt with from the end backwards to avoid overwriting bins before
349 * they've been processed.
352 lim = (t->mask + 1) >> 1;
353 for (i = 0; i < lim; i++) {
354 /* --- Some initialisation --- */
356 r = t->a[i]; /* Find the list we're dissecting */
357 p = (sym_base *)(t->a + i); /* Find bit-clear list */
358 q = (sym_base *)(t->a + i + lim); /* And the bit-set lsit */
360 /* --- Now go through the @r@ list --- */
363 if (r->hash & m) /* Is the next bit set? */
364 q = q->next = r; /* Yes, so fit up previous link */
366 p = p->next = r; /* No, so fit up previous link */
367 r = r->next; /* Move onto the next item */
369 p->next = q->next = 0; /* Null terminate the new lists */
373 /* --- Finished that, so return the new symbol block --- */
378 /* --- @sym_remove@ --- *
380 * Arguments: @sym_table *i@ = pointer to a symbol table object
381 * @void *b@ = pointer to symbol table entry
385 * Use: Removes the object from the symbol table. The space occupied
386 * by the object and its name is freed; anything else attached
387 * to the entry should already be gone by this point.
390 void sym_remove(sym_table *t, void *b)
392 /* --- A quick comment --- *
394 * Since the @sym_base@ block contains the hash, finding the element in the
395 * bin list is really quick -- it's not worth bothering with things like
396 * doubly linked lists.
400 sym_base *bin = (sym_base *)(t->a + (p->hash & t->mask));
402 /* --- Find the item in the bin list --- */
412 /* --- Now just remove the item from the list and free it --- *
414 * Oh, and bump the load counter.
423 /* --- @sym_createIter@ --- *
425 * Arguments: @sym_iter *i@ = pointer to an iterator object
426 * @sym_table *t@ = pointer to a symbol table object
430 * Use: Creates a new symbol table iterator which may be used to
431 * iterate through a symbol table.
434 void sym_createIter(sym_iter *i, sym_table *t)
441 /* --- @sym_next@ --- *
443 * Arguments: @sym_iter *i@ = pointer to iterator object
445 * Returns: Pointer to the next symbol found, or null when finished.
447 * Use: Returns the next symbol from the table. Symbols are not
448 * returned in any particular order.
451 void *sym_next(sym_iter *i)
455 /* --- Find the next item --- */
458 if (i->i > i->t->mask)
460 i->n = i->t->a[i->i++];
463 /* --- Update the iterator block --- */
475 /*----- CRC test code -----------------------------------------------------*/
482 sym__crc("123456789", 9, h);
483 printf("crc check == %04x\n", h);
489 /*----- Symbol table test code --------------------------------------------*/
498 typedef struct sym_word {
504 /* --- What it does --- *
506 * Reads the file /usr/dict/words (change to some other file full of
507 * interesting and appropriate bits of text to taste) into a big buffer and
508 * picks apart into lines. Then picks lines at random and enters them into
514 char *buff, *p, *lim;
523 /* --- Initialise for reading the file --- */
526 buff = xmalloc(sz + 1);
529 if ((fp = fopen("/usr/dict/words", "r")) == 0)
530 fprintf(stderr, "buggered ;-( (%s)\n", strerror(errno));
532 /* --- Read buffers of text --- *
534 * Read a buffer. If more to come, double the buffer size and try again.
535 * This is the method I recommended to comp.lang.c, so I may as well try
540 i = fread(buff + done, 1, sz - done, fp);
545 buff = xrealloc(buff, sz + 1);
548 /* --- Count the lines --- */
553 for (p = buff; p < lim; p++)
554 if (*p == '\n') sz++;
556 /* --- Build a table of line starts --- */
558 line = xmalloc(sz * sizeof(char *));
561 for (p = buff; p < lim; p++)
562 if (*p == '\n') *p = 0, line[i++] = p + 1;
565 /* --- Build a table of lines --- *
567 * This reverses the mapping which the symbol table performs, so that its
568 * accuracy can be tested.
571 flag = xmalloc(sz * sizeof(sym_word *));
572 for (i = 0; i < sz; i++)
576 sym_createTable(&tbl);
579 i = (unsigned)rand() % sz;
586 /* printf("find `%s'\n", line[i]); */
587 if ((rand() & 1023) == 0) {
588 putchar('.'); fflush(stdout);
591 w = sym_find(&tbl, line[i], -1, 0, 0);
593 printf("*** error: find `%s' gave %p not %p\n",
594 line[i], (void *)w, (void *)flag[i]);
595 else if (w && w->i != i)
596 printf("*** error: find sym for `%s' gives index %i not %i\n",
604 /* printf("create `%s'\n", line[i]); */
605 if ((rand() & 1023) == 0) {
606 putchar('+'); fflush(stdout);
609 w = sym_find(&tbl, line[i], -1, sizeof(sym_word), &f);
613 printf("*** error: create `%s' gave %p not %p\n",
614 line[i], (void *)w, (void *)flag[i]);
615 else if (w && w->i != i)
616 printf("*** error: create sym for `%s' gives index %i not %i\n",
622 printf("*** error: create `%s' gave new block, should be %p\n",
623 line[i], (void *)flag[i]);
635 int v = (rand() % entries) == 0;
638 printf("\niterated %i entries\n", entries);
643 ntbl = xmalloc(sz * sizeof(sym_word *));
644 memcpy(ntbl, flag, sz * sizeof(sym_word *));
645 sym_createIter(&it, &tbl);
647 while ((w = sym_next(&it)) != 0) {
649 printf("*** error: iterate returned duff item %i\n", w->i);
654 for (i = 0; i < sz; i++)
655 if (ntbl[i]) printf("*** error: iterate didn't return item %i\n",
662 int v = rand() & 255 ? 0 : 1;
667 for (i = 0; i <= tbl.mask; i++) {
668 if (!tbl.a[i]) continue;
669 if (v) printf(" %i: ", i);
672 if ((b->hash & tbl.mask) != i)
673 printf("*** error: bad hash value found");
674 if (v) printf("`%s'(%08lx:%lu) ",
675 line[((sym_word *)b)->i],
680 if (v) putchar('\n');
686 /* printf("remove `%s'\n", flag[i]->base.name); */
687 if ((rand() & 1023) == 0) {
688 putchar('-'); fflush(stdout);
690 sym_remove(&tbl, flag[i]);
704 /*----- That's all, folks -------------------------------------------------*/