4 Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
5 You can use this code in any manner, as long as you leave my name on it
6 and don't hold me responsible for any problems with it.
8 Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988
10 Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
12 Major reworking by Henry Spencer as part of the C News project.
14 Minor lint and CodeCenter (Saber) fluff removal by Rich $alz (March, 1991).
15 Non-portable CloseOnExec() calls added by Rich $alz (September, 1991).
16 Added "writethrough" and tagmask calculation code from
17 <rob@violet.berkeley.edu> and <leres@ee.lbl.gov> by Rich $alz (December, 1992).
18 Merged in MMAP code by David Robinson, formerly <david@elroy.jpl.nasa.gov>
19 now <david.robinson@sun.com> (January, 1993).
21 Major reworking by Clayton O'Neill (coneill@oneill.net). Removed all the
22 C News and backwards compatible cruft. Ripped out all the tagmask stuff
23 and replaced it with hashed .pag entries. This removes the need for
24 base file access. Primary bottleneck now appears to be the hash
25 algorithm and search(). You can change DBZ_INTERNAL_HASH_SIZE in
26 dbz.h to increase the size of the stored hash.
28 These routines replace dbm as used by the usenet news software
29 (it's not a full dbm replacement by any means). It's fast and
30 simple. It contains no AT&T code.
32 The dbz database exploits the fact that when news stores a <key,value>
33 tuple, the `value' part is a seek offset into a text file, pointing to
34 a copy of the `key' part. This avoids the need to store a copy of
35 the key in the dbz files.
37 The basic format of the database is two hash tables, each in it's own
38 file. One contains the offsets into the history text file , and the
39 other contains a hash of the message id. A value is stored by
40 indexing into the tables using a hash value computed from the key;
41 collisions are resolved by linear probing (just search forward for an
42 empty slot, wrapping around to the beginning of the table if
43 necessary). Linear probing is a performance disaster when the table
44 starts to get full, so a complication is introduced. Each file actually
45 contains one *or more* tables, stored sequentially in the files, and
46 the length of the linear-probe sequences is limited. The search (for
47 an existing item or an empy slot always starts in the first table of
48 the hash file, and whenever MAXRUN probes have been done in table N,
49 probing continues in table N+1. It is best not to overflow into more
50 than 1-2 tables or else massive performance degradation may occur.
51 Choosing the size of the database is extremely important because of this.
53 The table size is fixed for any particular database, but is determined
54 dynamically when a database is rebuilt. The strategy is to try to pick
55 the size so the first table will be no more than 2/3 full, that being
56 slightly before the point where performance starts to degrade. (It is
57 desirable to be a bit conservative because the overflow strategy tends
58 to produce files with holes in them, which is a nuisance.)
60 Tagged hash + offset fuzzy technique merged by Sang-yong Suh (Nov, 1997)
62 Fixed a bug handling larger than 1Gb history offset by Sang-yong Suh(1998)
63 Similar fix was suggested by Mike Hucka <hucka@umich.edu> (Jan, 1998) for
66 Limited can't tag warnings once per dbzinit() by Sang-yong Suh (May, 1998)
75 #include <netinet/in.h>
81 #include "inn/messages.h"
82 #include "inn/innconf.h"
86 /* Needed on AIX 4.1 to get fd_set and friends. */
87 #ifdef HAVE_SYS_SELECT_H
88 # include <sys/select.h>
92 * "LIA" = "leave it alone unless you know what you're doing".
94 * DBZTEST Generate a standalone program for testing and benchmarking
95 * DEFSIZE default table size (not as critical as in old dbz)
96 * NMEMORY number of days of memory for use in sizing new table (LIA)
97 * MAXRUN length of run which shifts to next table (see below) (LIA)
100 static int dbzversion = 6; /* for validating .dir file format */
102 #ifdef DO_TAGGED_HASH
103 /* assume that for tagged hash, we don't want more than 4byte of_t even if
104 * off_t is 8 bytes -- people who use tagged-hash are usually short on
111 #define SOF (sizeof(of_t))
112 #define NOTFOUND ((of_t)-1)
113 #ifdef DO_TAGGED_HASH
120 /* MAXDROPBITS is the maximum number of bits dropped from the offset value.
121 The least significant bits are dropped. The space is used to
122 store hash additional bits, thereby increasing the possibility of the
124 #define MAXDROPBITS 4 /* max # of bits to drop from the offset */
126 /* MAXFUZZYLENGTH is the maximum in the offset value due to the MAXDROPBITS */
127 #define MAXFUZZYLENGTH ((1 << MAXDROPBITS) - 1)
130 * We assume that unused areas of a binary file are zeros, and that the
131 * bit pattern of `(of_t)0' is all zeros. The alternative is rather
132 * painful file initialization. Note that okayvalue(), if OVERFLOW is
133 * defined, knows what value of an offset would cause overflow.
135 #define VACANT ((of_t)0)
136 #define BIAS(o) ((o)+1) /* make any valid of_t non-VACANT */
137 #define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */
139 #define HASTAG(o) ((o)&taghere)
140 #define TAG(o) ((o)&tagbits)
141 #define NOTAG(o) ((o)&~tagboth)
142 #define CANTAG(o) (((o)&tagboth) == 0)
143 #define MKTAG(v) (((v)<<conf.tagshift)&tagbits)
146 #define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */
150 #define TAGENB 0 /* no tags */
156 * Stdio buffer for base-file reads. Message-IDs (all news ever needs to
157 * read) are essentially never longer than 64 bytes, and the typical stdio
158 * buffer is so much larger that it is much more expensive to fill.
161 static of_t tagbits; /* pre-shifted tag mask */
162 static of_t taghere; /* pre-shifted tag-enable bit */
163 static of_t tagboth; /* tagbits|taghere */
164 static int canttag_warned; /* flag to control can't tag warning */
166 #endif /* DO_TAGGED_HASH */
168 /* Old dbz used a long as the record type for dbz entries, which became
169 * really gross in places because of mixed references. We define these to
170 * make it a bit easier if we want to store more in here.
173 /* A new, from-scratch database, not built as a rebuild of an old one, needs
174 * to know table size. Normally the user supplies this info, but there have
175 * to be defaults. Making this too small can have devestating effects on
176 * history speed for the current history implementation whereas making it too
177 * big just wastes disk space, so err on the side of caution. This may still
178 * be a bit too small. Assume people using tagged hash are running somewhat
183 #ifdef DO_TAGGED_HASH
184 #define DEFSIZE 1000003 /* I need a prime number */
186 #define DEFSIZE 10000000
191 * We read configuration info from the .dir file into this structure,
192 * so we can avoid wired-in assumptions for an existing database.
194 * Among the info is a record of recent peak usages, so that a new table
195 * size can be chosen intelligently when rebuilding. 10 is a good
196 * number of usages to keep, since news displays marked fluctuations
197 * in volume on a 7-day cycle.
200 #define NMEMORY 10 /* # days of use info to remember */
202 #define NUSEDS (1+NMEMORY)
205 long tsize; /* table size */
206 long used[NUSEDS]; /* entries used today, yesterday, ... */
207 long vused[NUSEDS]; /* ditto for text size */
208 int valuesize; /* size of table values, == sizeof(dbzrec) */
209 int fillpercent; /* fillpercent/100 is the percent full we'll
210 try to keep the .pag file */
211 of_t tagenb; /* unshifted tag-enable bit */
212 of_t tagmask; /* unshifted tag mask */
213 int tagshift; /* shift count for tagmask and tagenb */
214 int dropbits; /* number of bits to discard from offset */
215 int lenfuzzy; /* num of fuzzy characters in offset */
217 static dbzconfig conf;
220 * Default dbzoptions to
222 static dbzoptions options = {
223 false, /* write through off */
224 INCORE_NO, /* index/pag from disk */
226 INCORE_MMAP, /* exists mmap'ed. ignored in tagged hash mode */
228 INCORE_NO, /* exists from disk. ignored in tagged hash mode */
230 true /* non-blocking writes */
234 * Data structure for recording info about searches.
237 of_t place; /* current location in file */
238 int tabno; /* which table we're in */
239 int run; /* how long we'll stay in this table */
243 HASH hash; /* the key's hash code */
244 unsigned long shorthash; /* integer version of the hash, used for
245 determining the entries location.
246 Tagged_hash stores the 31-bit hash here */
247 of_t tag; /* tag we are looking for */
248 int aborted; /* has i/o error aborted search? */
250 #define FRESH ((searcher *)NULL)
253 * Arguably the searcher struct for a given routine ought to be local to
254 * it, but a fetch() is very often immediately followed by a store(), and
255 * in some circumstances it is a useful performance win to remember where
256 * the fetch() completed. So we use a global struct and remember whether
259 static searcher srch;
260 static searcher *prevp; /* &srch or FRESH */
262 /* Structure for hash tables */
264 int fd; /* Non-blocking descriptor for writes */
265 off_t pos; /* Current offset into the table */
266 int reclen; /* Length of records in the table */
267 dbz_incore_val incore; /* What we're using core for */
268 void *core; /* Pointer to in-core table */
271 /* central data structures */
272 static bool opendb = false; /* Indicates if a database is currently open */
273 static FILE *dirf; /* descriptor for .dir file */
274 static bool readonly; /* database open read-only? */
275 #ifdef DO_TAGGED_HASH
276 static FILE *basef; /* descriptor for base file */
277 static char *basefname; /* name for not-yet-opened base file */
278 static hash_table pagtab; /* pag hash table, stores hash + offset */
280 static hash_table idxtab; /* index hash table, used for data retrieval */
281 static hash_table etab; /* existance hash table, used for existance checks */
283 static bool dirty; /* has a store() been done? */
284 static erec empty_rec; /* empty rec to compare against
285 initalized in dbzinit */
288 static bool getcore(hash_table *tab);
289 static bool putcore(hash_table *tab);
290 static bool getconf(FILE *df, dbzconfig *cp);
291 static int putconf(FILE *f, dbzconfig *cp);
292 static void start(searcher *sp, const HASH hash, searcher *osp);
293 #ifdef DO_TAGGED_HASH
294 static of_t search(searcher *sp);
295 static bool set_pag(searcher *sp, of_t value);
297 static bool search(searcher *sp);
299 static bool set(searcher *sp, hash_table *tab, void *value);
301 /* file-naming stuff */
302 static char dir[] = ".dir";
303 #ifdef DO_TAGGED_HASH
304 static char pag[] = ".pag";
306 static char idx[] = ".index";
307 static char exists[] = ".hash";
310 int dbzneedfilecount(void) {
311 #ifdef DO_TAGGED_HASH
312 return 2; /* basef and dirf are fopen()'ed and kept */
314 return 1; /* dirf is fopen()'ed and kept */
318 #ifdef DO_TAGGED_HASH
320 - dbzconfbase - reconfigure dbzconf from base file size.
323 config_by_text_size(dbzconfig *c, of_t basesize)
328 /* if no tag requested, just return. */
329 if ((c->tagmask | c->tagenb) == 0)
332 /* Use 10 % larger base file size. Sometimes the offset overflows */
333 basesize += basesize / 10;
335 /* calculate tagging from old file */
336 for (m = 1, i = 0; m < (unsigned long)basesize; i++, m <<= 1)
339 /* if we had more tags than the default, use the new data */
341 while (m > (1 << TAGSHIFT)) {
342 if (c->dropbits >= MAXDROPBITS)
349 c->tagmask = TAGMASK;
350 c->tagshift = TAGSHIFT;
351 if ((c->tagmask | c->tagenb) && m > (1 << TAGSHIFT)) {
353 c->tagmask = (~(unsigned long)0) >> (i + 1);
354 c->tagenb = (c->tagmask << 1) & ~c->tagmask;
356 c->lenfuzzy = (int)(1 << c->dropbits) - 1;
358 m = (c->tagmask | c->tagenb) << c->tagshift;
359 if (m & (basesize >> c->dropbits)) {
360 fprintf(stderr, "m 0x%lx size 0x%lx\n", m, basesize);
364 #endif /* DO_TAGGED_HASH */
367 - create and truncate .pag, .idx, or .hash files
368 - return false on error
371 create_truncate(const char *name, const char *pag1)
376 fn = concat(name, pag1, (char *) 0);
377 f = Fopen(fn, "w", TEMPORARYOPEN);
380 syswarn("unable to create/truncate %s", pag1);
387 /* dbzfresh - set up a new database, no historical info
388 * Return true for success, false for failure
389 * name - base name; .dir and .pag must exist
390 * size - table size (0 means default)
393 dbzfresh(const char *name, off_t size)
398 #ifdef DO_TAGGED_HASH
404 warn("dbzfresh: database already open");
407 if (size != 0 && size < 2) {
408 warn("dbzfresh: preposterous size (%ld)", (long) size);
412 /* get default configuration */
413 if (!getconf(NULL, &c))
414 return false; /* "can't happen" */
416 #ifdef DO_TAGGED_HASH
417 /* and mess with it as specified */
427 c.tagenb = (m << 1) & ~m;
431 /* if big enough basb file exists, update config */
432 if (stat(name, &sb) != -1)
433 config_by_text_size(&c, sb.st_size);
435 /* set the size as specified, make sure we get at least 2 bytes
438 c.tsize = size > (64 * 1024) ? size : 64 * 1024;
442 fn = concat(name, dir, (char *) 0);
443 f = Fopen(fn, "w", TEMPORARYOPEN);
446 syswarn("dbzfresh: unable to write config");
449 if (putconf(f, &c) < 0) {
453 if (Fclose(f) == EOF) {
454 syswarn("dbzfresh: fclose failure");
458 /* create and truncate .pag, or .index/.hash files */
459 #ifdef DO_TAGGED_HASH
460 if (!create_truncate(name, pag))
463 if (!create_truncate(name, idx))
465 if (!create_truncate(name, exists))
467 #endif /* DO_TAGGED_HASH */
469 /* and punt to dbzinit for the hard work */
470 return dbzinit(name);
473 #ifdef DO_TAGGED_HASH
475 - isprime - is a number prime?
480 static int quick[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
484 /* hit the first few primes quickly to eliminate easy ones */
485 /* this incidentally prevents ridiculously small tables */
486 for (ip = quick; (div1 = *ip) != 0; ip++)
488 debug("isprime: quick result on %ld", x);
492 /* approximate square root of x */
493 for (stop = x; x/stop < stop; stop >>= 1)
497 /* try odd numbers up to stop */
498 for (div1 = *--ip; div1 < stop; div1 += 2)
507 * dbzsize - what's a good table size to hold this many entries?
508 * contents - size of table (0 means return the default)
511 dbzsize(off_t contents)
515 if (contents <= 0) { /* foulup or default inquiry */
516 debug("dbzsize: preposterous input (%ld)", (long) contents);
520 if ((conf.fillpercent > 0) && (conf.fillpercent < 100))
521 n = (contents / conf.fillpercent) * 100;
523 n = (contents * 3) / 2; /* try to keep table at most 2/3's full */
525 /* Make sure that we get at least 2 bytes of implicit hash */
529 #ifdef DO_TAGGED_HASH
531 n += 1; /* make it odd */
532 debug("dbzsize: tentative size %ld", n);
533 while (!isprime(n)) /* look for a prime */
537 debug("dbzsize: final size %ld", (long) n);
541 /* dbzagain - set up a new database to be a rebuild of an old one
542 * Returns true on success, false on failure
543 * name - base name; .dir and .pag must exist
544 * oldname - basename, all must exist
547 dbzagain(const char *name, const char *oldname)
557 #ifdef DO_TAGGED_HASH
563 warn("dbzagain: database already open");
567 /* pick up the old configuration */
568 fn = concat(oldname, dir, (char *) 0);
569 f = Fopen(fn, "r", TEMPORARYOPEN);
572 syswarn("dbzagain: cannot open old .dir file");
575 result = getconf(f, &c);
578 syswarn("dbzagain: getconf failed");
585 for (i = 0; i < NUSEDS; i++) {
589 newtable = 1; /* hasn't got full usage history yet */
592 debug("dbzagain: old table has no contents!");
595 for (i = NUSEDS-1; i > 0; i--)
596 c.used[i] = c.used[i-1];
599 #ifdef DO_TAGGED_HASH
601 for (i = 0; i < NUSEDS; i++) {
602 if (vtop < c.vused[i])
605 newtable = 1; /* hasn't got full usage history yet */
607 if (top != 0 && vtop == 0) {
608 debug("dbzagain: old table has no contents!");
611 for (i = NUSEDS-1; i > 0; i--)
612 c.vused[i] = c.vused[i-1];
615 /* calculate tagging from old file */
616 if (stat(oldname, &sb) != -1 && vtop < sb.st_size)
618 config_by_text_size(&c, vtop);
621 newsize = dbzsize(top);
622 if (!newtable || newsize > c.tsize) /* don't shrink new table */
626 fn = concat(name, dir, (char *) 0);
627 f = Fopen(fn, "w", TEMPORARYOPEN);
630 syswarn("dbzagain: unable to write new .dir");
636 warn("dbzagain: putconf failed");
640 /* create and truncate .pag, or .index/.hash files */
641 #ifdef DO_TAGGED_HASH
642 if (!create_truncate(name, pag))
645 if (!create_truncate(name, idx))
647 if (!create_truncate(name, exists))
651 /* and let dbzinit do the work */
652 return dbzinit(name);
656 openhashtable(const char *base, const char *ext, hash_table *tab,
657 const size_t reclen, const dbz_incore_val incore)
662 name = concat(base, ext, (char *) 0);
663 if ((tab->fd = open(name, readonly ? O_RDONLY : O_RDWR)) < 0) {
664 syswarn("openhashtable: could not open raw");
672 tab->reclen = reclen;
673 close_on_exec(tab->fd, true);
676 /* get first table into core, if it looks desirable and feasible */
677 tab->incore = incore;
678 if (tab->incore != INCORE_NO) {
680 syswarn("openhashtable: getcore failure");
688 if (options.nonblock && nonblocking(tab->fd, true) < 0) {
689 syswarn("fcntl: could not set nonblock");
698 static void closehashtable(hash_table *tab) {
700 if (tab->incore == INCORE_MEM)
702 if (tab->incore == INCORE_MMAP) {
703 #if defined(HAVE_MMAP)
704 if (munmap(tab->core, conf.tsize * tab->reclen) == -1) {
705 syswarn("closehashtable: munmap failed");
708 warn("closehashtable: can't mmap files");
713 #ifdef DO_TAGGED_HASH
715 openbasefile(const char *name)
717 basef = Fopen(name, "r", DBZ_BASE);
719 syswarn("dbzinit: basefile open failed");
720 basefname = xstrdup(name);
724 close_on_exec(fileno(basef), true);
726 setvbuf(basef, NULL, _IOFBF, 64);
729 #endif /* DO_TAGGED_HASH */
732 * dbzinit - open a database, creating it (using defaults) if necessary
734 * We try to leave errno set plausibly, to the extent that underlying
735 * functions permit this, since many people consult it if dbzinit() fails.
736 * return true for success, false for failure
739 dbzinit(const char *name)
744 warn("dbzinit: dbzinit already called once");
749 /* open the .dir file */
750 fname = concat(name, dir, (char *) 0);
751 if ((dirf = Fopen(fname, "r+", DBZ_DIR)) == NULL) {
752 dirf = Fopen(fname, "r", DBZ_DIR);
758 syswarn("dbzinit: can't open .dir file");
761 close_on_exec(fileno(dirf), true);
763 /* pick up configuration */
764 if (!getconf(dirf, &conf)) {
765 warn("dbzinit: getconf failure");
767 errno = EDOM; /* kind of a kludge, but very portable */
771 /* open pag or idx/exists file */
772 #ifdef DO_TAGGED_HASH
773 if (!openhashtable(name, pag, &pagtab, SOF, options.pag_incore)) {
777 if (!openbasefile(name)) {
782 tagbits = conf.tagmask << conf.tagshift;
783 taghere = conf.tagenb << conf.tagshift;
784 tagboth = tagbits | taghere;
787 if (!openhashtable(name, idx, &idxtab, sizeof(of_t), options.pag_incore)) {
791 if (!openhashtable(name, exists, &etab, sizeof(erec),
792 options.exists_incore)) {
802 memset(&empty_rec, '\0', sizeof(empty_rec));
803 debug("dbzinit: succeeded");
807 /* dbzclose - close a database
815 warn("dbzclose: not opened!");
822 #ifdef DO_TAGGED_HASH
823 closehashtable(&pagtab);
824 if (Fclose(basef) == EOF) {
825 syswarn("dbzclose: fclose(basef) failed");
828 if (basefname != NULL)
832 closehashtable(&idxtab);
833 closehashtable(&etab);
836 if (Fclose(dirf) == EOF) {
837 syswarn("dbzclose: fclose(dirf) failed");
841 debug("dbzclose: %s", (ret == true) ? "succeeded" : "failed");
847 /* dbzsync - push all in-core data out to disk
855 warn("dbzsync: not opened!");
862 #ifdef DO_TAGGED_HASH
863 if (!putcore(&pagtab)) {
865 if (!putcore(&idxtab) || !putcore(&etab)) {
867 warn("dbzsync: putcore failed");
871 if (putconf(dirf, &conf) < 0)
874 debug("dbzsync: %s", ret ? "succeeded" : "failed");
878 #ifdef DO_TAGGED_HASH
880 - okayvalue - check that a value can be stored
883 okayvalue(of_t value)
888 if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */
895 /* dbzexists - check if the given message-id is in the database */
897 dbzexists(const HASH key)
899 #ifdef DO_TAGGED_HASH
902 return (dbzfetch(key, &value) != 0);
906 warn("dbzexists: database not open!");
911 start(&srch, key, FRESH);
912 return search(&srch);
917 * dbzfetch - get offset of an entry from the database
919 * Returns the offset of the text file for input key,
920 * or -1 if NOTFOUND or error occurs.
923 dbzfetch(const HASH key, off_t *value)
925 #ifdef DO_TAGGED_HASH
926 #define MAX_NB2RD (DBZMAXKEY + MAXFUZZYLENGTH + 2)
927 #define MIN_KEY_LENGTH 6 /* strlen("<1@a>") + strlen("\t") */
928 char *bp, buffer[MAX_NB2RD];
931 char *keytext = NULL;
932 of_t offset = NOTFOUND;
938 warn("dbzfetch: database not open!");
942 start(&srch, key, FRESH);
943 #ifdef DO_TAGGED_HASH
945 * nb2r: number of bytes to read from history file.
946 * It can be reduced if history text is written as hashes only.
948 nb2r = sizeof(buffer) - 1;
950 while ((offset = search(&srch)) != NOTFOUND) {
951 debug("got 0x%lx", key_ptr);
954 offset <<= conf.dropbits;
955 if (offset) /* backspace 1 character to read '\n' */
957 if (fseeko(basef, offset, SEEK_SET) != 0) {
958 syswarn("dbzfetch: seek failed");
961 keylen = fread(buffer, 1, nb2r, basef);
962 if (keylen < MIN_KEY_LENGTH) {
963 syswarn("dbzfetch: read failed");
966 buffer[keylen] = '\0'; /* terminate the string */
968 if (offset) { /* find the '\n', the previous EOL */
969 if (keylen > conf.lenfuzzy)
970 keylen = conf.lenfuzzy; /* keylen is fuzzy distance now */
971 for (j=0,bp=buffer; j<keylen; j++,bp++)
975 debug("dbzfetch: can't locate EOL");
976 /* pag entry should be deleted, but I'm lazy... */
980 bp++; /* now *bp is the start of key */
982 j = 0; /* We are looking for the first history line. */
986 /* Does this history line really have same key? */
989 keytext = HashToText(key);
990 if (memcmp(keytext, bp+1, sizeof(HASH)*2) != 0)
992 } else if (*bp == '<') {
993 char *p = strchr(bp+1, '\t');
994 if (!p) /* history has a corrupted line */
997 hishash = HashMessageID(bp);
998 if (memcmp(&key, &hishash, sizeof(HASH)) != 0)
1005 debug("fetch: successful");
1010 /* we didn't find it */
1011 debug("fetch: failed");
1012 prevp = &srch; /* remember where we stopped */
1014 #else /* DO_TAGGED_HASH */
1015 if (search(&srch) == true) {
1016 /* Actually get the data now */
1017 if ((options.pag_incore != INCORE_NO) && (srch.place < conf.tsize)) {
1018 memcpy(value, &((of_t *)idxtab.core)[srch.place], sizeof(of_t));
1020 if (pread(idxtab.fd, value, sizeof(of_t), srch.place * idxtab.reclen) != sizeof(of_t)) {
1021 syswarn("fetch: read failed");
1027 debug("fetch: successful");
1031 /* we didn't find it */
1032 debug("fetch: failed");
1033 prevp = &srch; /* remember where we stopped */
1039 * dbzstore - add an entry to the database
1040 * returns true for success and false for failure
1043 dbzstore(const HASH key, off_t data)
1045 #ifdef DO_TAGGED_HASH
1052 warn("dbzstore: database not open!");
1053 return DBZSTORE_ERROR;
1056 warn("dbzstore: database open read-only");
1057 return DBZSTORE_ERROR;
1060 #ifdef DO_TAGGED_HASH
1061 /* copy the value in to ensure alignment */
1062 memcpy(&value, &data, SOF);
1064 /* update maximum offset value if necessary */
1065 if (value > conf.vused[0])
1066 conf.vused[0] = value;
1068 /* now value is in fuzzy format */
1069 value >>= conf.dropbits;
1070 debug("dbzstore: (%ld)", (long) value);
1072 if (!okayvalue(value)) {
1073 warn("dbzstore: reserved bit or overflow in 0x%lx", value);
1074 return DBZSTORE_ERROR;
1077 /* find the place, exploiting previous search if possible */
1078 start(&srch, key, prevp);
1079 while (search(&srch) != NOTFOUND)
1084 debug("store: used count %ld", conf.used[0]);
1086 if (!set_pag(&srch, value))
1087 return DBZSTORE_ERROR;
1089 #else /* DO_TAGGED_HASH */
1091 /* find the place, exploiting previous search if possible */
1092 start(&srch, key, prevp);
1093 if (search(&srch) == true)
1094 return DBZSTORE_EXISTS;
1098 debug("store: used count %ld", conf.used[0]);
1101 memcpy(&evalue.hash, &srch.hash,
1102 sizeof(evalue.hash) < sizeof(srch.hash) ? sizeof(evalue.hash) : sizeof(srch.hash));
1104 /* Set the value in the index first since we don't care if it's out of date */
1105 if (!set(&srch, &idxtab, (void *)&data))
1106 return DBZSTORE_ERROR;
1107 if (!set(&srch, &etab, &evalue))
1108 return DBZSTORE_ERROR;
1110 #endif /* DO_TAGGED_HASH */
1114 * getconf - get configuration from .dir file
1115 * df - NULL means just give me the default
1116 * pf - NULL means don't care about .pag
1117 * returns true for success, false for failure
1120 getconf(FILE *df, dbzconfig *cp)
1124 /* empty file, no configuration known */
1125 #ifdef DO_TAGGED_HASH
1127 cp->tsize = DEFSIZE;
1128 for (i = 0; i < NUSEDS; i++)
1130 for (i = 0; i < NUSEDS; i++)
1132 cp->valuesize = sizeof(of_t);
1133 cp->fillpercent = 50;
1134 cp->tagenb = TAGENB;
1135 cp->tagmask = TAGMASK;
1136 cp->tagshift = TAGSHIFT;
1139 debug("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d %d))",
1140 cp->tsize, cp->casemap, cp->tagenb,
1141 cp->tagmask, cp->tagshift, cp->dropbits);
1145 i = fscanf(df, "dbz 6 %ld %d %d %ld %ld %d %d\n", &cp->tsize,
1146 &cp->valuesize, &cp->fillpercent, &cp->tagenb,
1147 &cp->tagmask, &cp->tagshift, &cp->dropbits);
1149 warn("dbz: bad first line in config");
1152 if (cp->valuesize != sizeof(of_t)) {
1153 warn("dbz: wrong of_t size (%d)", cp->valuesize);
1156 cp->lenfuzzy = (int)(1 << cp->dropbits) - 1;
1157 #else /* DO_TAGGED_HASH */
1159 cp->tsize = DEFSIZE;
1160 for (i = 0; i < NUSEDS; i++)
1162 cp->valuesize = sizeof(of_t) + sizeof(erec);
1163 cp->fillpercent = 66;
1164 debug("getconf: defaults (%ld)", cp->tsize);
1168 i = fscanf(df, "dbz 6 %ld %d %d\n", &cp->tsize,
1169 &cp->valuesize, &cp->fillpercent);
1171 warn("dbz: bad first line in config");
1174 if (cp->valuesize != (sizeof(of_t) + sizeof(erec))) {
1175 warn("dbz: wrong of_t size (%d)", cp->valuesize);
1178 #endif /* DO_TAGGED_HASH */
1179 debug("size %ld", cp->tsize);
1181 /* second line, the usages */
1182 for (i = 0; i < NUSEDS; i++)
1183 if (!fscanf(df, "%ld", &cp->used[i])) {
1184 warn("dbz: bad usage value in config");
1187 debug("used %ld %ld %ld...", cp->used[0], cp->used[1], cp->used[2]);
1189 #ifdef DO_TAGGED_HASH
1190 /* third line, the text usages */
1191 for (i = 0; i < NUSEDS; i++)
1192 if (!fscanf(df, "%ld", &cp->vused[i])) {
1193 warn("dbz: bad text usage value in config");
1196 debug("vused %ld %ld %ld...", cp->vused[0], cp->vused[1], cp->vused[2]);
1197 #endif /* DO_TAGGED_HASH */
1202 /* putconf - write configuration to .dir file
1203 * Returns: 0 for success, -1 for failure
1206 putconf(FILE *f, dbzconfig *cp)
1211 if (fseeko(f, 0, SEEK_SET) != 0) {
1212 syswarn("dbz: fseeko failure in putconf");
1216 #ifdef DO_TAGGED_HASH
1217 fprintf(f, "dbz %d %ld %d %d %ld %ld %d %d\n", dbzversion, cp->tsize,
1218 cp->valuesize, cp->fillpercent, cp->tagenb,
1219 cp->tagmask, cp->tagshift, cp->dropbits);
1220 #else /* DO_TAGGED_HASH */
1221 fprintf(f, "dbz %d %ld %d %d\n", dbzversion, cp->tsize,
1222 cp->valuesize, cp->fillpercent);
1223 #endif /* DO_TAGGED_HASH */
1225 for (i = 0; i < NUSEDS; i++)
1226 fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
1228 #ifdef DO_TAGGED_HASH
1229 for (i = 0; i < NUSEDS; i++)
1230 fprintf(f, "%ld%c", cp->vused[i], (i < NUSEDS-1) ? ' ' : '\n');
1237 debug("putconf status %d", ret);
1241 /* getcore - try to set up an in-core copy of file
1243 * Returns: pointer to copy of file or NULL on errror
1246 getcore(hash_table *tab)
1252 size_t length = conf.tsize * tab->reclen;
1254 if (tab->incore == INCORE_MMAP) {
1255 #if defined(HAVE_MMAP)
1256 if (fstat(tab->fd, &st) == -1) {
1257 syswarn("dbz: getcore: fstat failed");
1260 if (length > st.st_size) {
1261 /* file too small; extend it */
1262 if (ftruncate(tab->fd, length) == -1) {
1263 syswarn("dbz: getcore: ftruncate failed");
1267 it = mmap(NULL, length, readonly ? PROT_READ : PROT_WRITE | PROT_READ,
1268 MAP_SHARED, tab->fd, 0);
1269 if (it == (char *)-1) {
1270 syswarn("dbz: getcore: mmap failed");
1273 #if defined (MADV_RANDOM) && defined(HAVE_MADVISE)
1274 /* not present in all versions of mmap() */
1275 madvise(it, length, MADV_RANDOM);
1278 warn("dbz: getcore: can't mmap files");
1282 it = xmalloc(length);
1284 nread = read(tab->fd, it, length);
1286 syswarn("dbz: getcore: read failed");
1292 memset(it + nread, '\0', i);
1299 /* putcore - try to rewrite an in-core table
1301 * Returns true on success, false on failure
1304 putcore(hash_table *tab)
1309 if (tab->incore == INCORE_MEM) {
1310 if(options.writethrough)
1312 nonblocking(tab->fd, false);
1313 size = tab->reclen * conf.tsize;
1314 result = xpwrite(tab->fd, tab->core, size, 0);
1315 if (result < 0 || (size_t) result != size) {
1316 nonblocking(tab->fd, options.nonblock);
1319 nonblocking(tab->fd, options.nonblock);
1322 if(tab->incore == INCORE_MMAP) {
1323 msync(tab->core, conf.tsize * tab->reclen, MS_ASYNC);
1329 #ifdef DO_TAGGED_HASH
1331 - makehash31 : make 31-bit hash from HASH
1334 makehash31(const HASH *hash)
1337 memcpy(&h, hash, sizeof(h));
1342 /* start - set up to start or restart a search
1343 * osp == NULL is acceptable
1346 start(searcher *sp, const HASH hash, searcher *osp)
1348 #ifdef DO_TAGGED_HASH
1351 h = makehash31(&hash);
1352 if (osp != FRESH && osp->shorthash == h) {
1356 debug("search restarted");
1359 sp->tag = MKTAG(h / conf.tsize);
1360 sp->place = h % conf.tsize;
1361 debug("hash %8.8lx tag %8.8lx place %ld",
1362 sp->shorthash, sp->tag, sp->place);
1368 #else /* DO_TAGGED_HASH */
1371 if (osp != FRESH && !memcmp(&osp->hash, &hash, sizeof(hash))) {
1375 debug("search restarted");
1378 tocopy = sizeof(hash) < sizeof(sp->shorthash) ? sizeof(hash) : sizeof(sp->shorthash);
1379 /* Copy the bottom half of thhe hash into sp->shorthash */
1380 memcpy(&sp->shorthash, (const char *)&hash + (sizeof(hash) - tocopy),
1382 sp->shorthash >>= 1;
1387 #endif /* DO_TAGGED_HASH */
1390 #ifdef DO_TAGGED_HASH
1392 - search - conduct part of a search
1394 static of_t /* NOTFOUND if we hit VACANT or error */
1395 search(searcher *sp)
1398 unsigned long taboffset = sp->tabno * conf.tsize;
1404 /* go to next location */
1405 if (sp->run++ == MAXRUN) {
1408 taboffset = sp->tabno * conf.tsize;
1410 sp->place = ((sp->shorthash + sp->run) % conf.tsize) + taboffset;
1411 debug("search @ %ld", place);
1413 /* get the tagged value */
1414 if ((options.pag_incore != INCORE_NO) && (sp->place < conf.tsize)) {
1415 debug("search: in core");
1416 value = ((of_t *)pagtab.core)[sp->place];
1419 dest = sp->place * SOF;
1423 if (pread(pagtab.fd, &value, sizeof(value), dest) != sizeof(value)) {
1425 syswarn("dbz: search: read failed");
1433 pagtab.pos += sizeof(value);
1436 /* vacant slot is always cause to return */
1437 if (value == VACANT) {
1438 debug("search: empty slot");
1443 value = UNBIAS(value);
1444 debug("got 0x%lx", value);
1445 if (!HASTAG(value)) {
1448 } else if (TAG(value) == sp->tag) {
1450 return(NOTAG(value));
1452 debug("mismatch 0x%lx", TAG(value));
1458 #else /* DO_TAGGED_HASH */
1460 /* search - conduct part of a search
1462 * return false if we hit vacant rec's or error
1465 search(searcher *sp)
1468 unsigned long taboffset = 0;
1474 /* go to next location */
1475 if (sp->run++ == MAXRUN) {
1478 taboffset = sp->tabno * conf.tsize;
1481 sp->place = ((sp->shorthash + sp->run) % conf.tsize) + taboffset;
1482 debug("search @ %ld", (long) sp->place);
1485 if ((options.exists_incore != INCORE_NO) && (sp->place < conf.tsize)) {
1486 debug("search: in core");
1487 memcpy(&value, &((erec *)etab.core)[sp->place], sizeof(erec));
1490 dest = sp->place * sizeof(erec);
1494 if (pread(etab.fd, &value, sizeof(erec), dest) != sizeof(erec)) {
1496 debug("search: read failed");
1501 memset(&value, '\0', sizeof(erec));
1506 etab.pos += sizeof(erec);
1509 /* Check for an empty record */
1510 if (!memcmp(&value, &empty_rec, sizeof(erec))) {
1511 debug("search: empty slot");
1515 /* check the value */
1516 debug("got 0x%.*s", DBZ_INTERNAL_HASH_SIZE, value.hash);
1517 if (!memcmp(&value.hash, &sp->hash, DBZ_INTERNAL_HASH_SIZE)) {
1523 #endif /* DO_TAGGED_HASH */
1525 /* set - store a value into a location previously found by search
1527 * Returns: true success, false failure
1530 set(searcher *sp, hash_table *tab, void *value)
1537 /* If we have the index file in memory, use it */
1538 if ((tab->incore != INCORE_NO) && (sp->place < conf.tsize)) {
1539 void *where = (char *)tab->core + (sp->place * tab->reclen);
1541 memcpy(where, value, tab->reclen);
1542 debug("set: incore");
1543 if (tab->incore == INCORE_MMAP) {
1544 if (innconf->nfswriter) {
1545 inn_mapcntl(where, tab->reclen, MS_ASYNC);
1549 if (!options.writethrough)
1554 tab->pos = -1; /* invalidate position memory */
1555 offset = sp->place * tab->reclen;
1558 while (pwrite(tab->fd, value, tab->reclen, offset) != tab->reclen) {
1559 if (errno == EAGAIN) {
1563 FD_SET(tab->fd, &writeset);
1564 if (select(tab->fd + 1, NULL, &writeset, NULL, NULL) < 1) {
1565 syswarn("dbz: set: select failed");
1571 syswarn("dbz: set: write failed");
1576 debug("set: succeeded");
1580 #ifdef DO_TAGGED_HASH
1582 - set_pag - store a value into a location previously found by search
1584 - Returns: true success, false failure
1587 set_pag(searcher *sp, of_t value)
1592 v |= sp->tag | taghere;
1593 if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */
1595 if (v != LONG_MAX) /* and it won't overflow */
1598 } else if (canttag_warned == 0) {
1599 fprintf(stderr, "dbz.c(set): can't tag value 0x%lx", v);
1600 fprintf(stderr, " tagboth = 0x%lx\n", tagboth);
1603 debug("tagged value is 0x%lx", value);
1604 value = BIAS(value);
1606 return set(sp, &pagtab, &value);
1608 #endif /* DO_TAGGED_HASH */
1610 /* dbzsetoptions - set runtime options for the database.
1613 dbzsetoptions(const dbzoptions o)
1617 /* Without a working mmap on files, we should avoid it. */
1618 if (options.pag_incore == INCORE_MMAP) options.pag_incore = INCORE_NO;
1619 if (options.exists_incore == INCORE_MMAP) options.exists_incore = INCORE_NO;
1623 /* dbzgetoptions - get runtime options for the database.
1626 dbzgetoptions(dbzoptions *o)
1635 timediffms(struct timeval start, struct timeval end)
1637 return (((end.tv_sec - start.tv_sec) * 1000) +
1638 ((end.tv_usec - start.tv_usec)) / 1000);
1642 RemoveDBZ(char *filename)
1646 #ifdef DO_TAGGED_HASH
1647 fn = concat(filename, pag, (char *) 0);
1651 fn = concat(filename, exists, (char *) 0);
1654 fn = concat(filename, idx, (char *) 0);
1658 fn = concat(filename, dir, (char *) 0);
1666 fprintf(stderr, "usage: dbztest [-i] [-n|m] [-N] [-s size] <history>\n");
1667 #ifdef DO_TAGGED_HASH
1668 fprintf(stderr, " -i initialize history. deletes .pag files\n");
1670 fprintf(stderr, " -i initialize history. deletes .exists and .index files\n");
1672 fprintf(stderr, " -n or m use INCORE_NO, INCORE_MMAP. default = INCORE_MEM\n");
1673 fprintf(stderr, " -N using nfswriter mode\n");
1674 fprintf(stderr, " -s size number of history lines[2500000]\n");
1675 fprintf(stderr, " history history text file\n");
1680 main(int argc, char *argv[])
1684 char ibuf[2048], *p;
1687 int initialize = 0, size = 2500000;
1688 char *history = NULL;
1690 dbz_incore_val incore = INCORE_MEM;
1691 struct timeval start, end;
1694 innconf = xcalloc(1, sizeof(struct innconf));
1696 for (i=1; i<argc; i++)
1697 if (strcmp(argv[i], "-i") == 0)
1699 else if (strcmp(argv[i], "-n") == 0)
1701 else if (strcmp(argv[i], "-N") == 0)
1702 innconf->nfswriter = true;
1703 else if (strcmp(argv[i], "-m") == 0)
1704 #if defined(HAVE_MMAP)
1705 incore = INCORE_MMAP;
1707 fprintf (stderr, "can't mmap files\n");
1709 else if (strcmp(argv[i], "-s") == 0)
1710 size = atoi(argv[++i]);
1711 else if (*argv[i] != '-' && history == NULL)
1716 if (history == NULL)
1718 if ((fpi = fopen(history, "r")) == NULL) {
1719 fprintf(stderr, "can't open %s\n", history);
1723 dbzgetoptions(&opt);
1724 opt.pag_incore = incore;
1729 gettimeofday(&start, NULL);
1730 if (dbzfresh(history, dbzsize(size)) < 0) {
1731 fprintf(stderr, "cant dbzfresh %s\n", history);
1734 gettimeofday(&end, NULL);
1735 printf("dbzfresh: %d msec\n", timediffms(start, end));
1737 gettimeofday(&start, NULL);
1738 if (dbzinit(history) < 0) {
1739 fprintf(stderr, "cant dbzinit %s\n", history);
1742 gettimeofday(&end, NULL);
1743 printf("dbzinit: %d msec\n", timediffms(start, end));
1746 gettimeofday(&start, NULL);
1747 where = ftello(fpi);
1748 for (line=1; fgets(ibuf, sizeof(ibuf), fpi); line++, where=ftello(fpi)) {
1750 if ((p = strchr(ibuf, '\t')) == NULL) {
1751 fprintf(stderr, "ignoreing bad line: %s\n", ibuf);
1755 key = HashMessageID(ibuf);
1756 } else if (*ibuf == '[')
1757 key = TextToHash(ibuf+1);
1761 if (dbzstore(key, where) == DBZSTORE_ERROR) {
1762 fprintf(stderr, "cant store %s\n", ibuf);
1766 if (!dbzfetch(key, &ivalue)) {
1767 fprintf(stderr, "line %d can't fetch %s\n", line, ibuf);
1773 gettimeofday(&end, NULL);
1774 i = timediffms(start, end);
1775 printf("%s: %d lines %.3f msec/id\n",
1776 (initialize) ? "dbzstore" : "dbzfetch",
1777 line, (double)i / (double)line);
1779 gettimeofday(&end, NULL);
1781 gettimeofday(&end, NULL);
1782 printf("dbzclose: %d msec\n", timediffms(start, end));
1785 #endif /* DBZTEST */