From 6f444bda1b7deb31cf7fb2395cb0993c3e3b8c42 Mon Sep 17 00:00:00 2001 Message-Id: <6f444bda1b7deb31cf7fb2395cb0993c3e3b8c42.1715407295.git.mdw@distorted.org.uk> From: Mark Wooding Date: Mon, 15 Dec 2003 20:54:57 +0000 Subject: [PATCH] Add global unihash table; use universal hashing instead of CRC. Organization: Straylight/Edgeware From: mdw --- atom.c | 9 ++++++--- crc-mktab.c | 11 ++++++++--- debian/changelog | 8 ++++---- debian/control | 21 +++++++++++++++++++++ debian/rules | 10 +++++++--- man/crc-mktab.1 | 2 +- man/mLib.3 | 10 ++++++++-- man/sym.3 | 11 +++++++---- man/unihash.3 | 15 +++++++++++++++ sym.c | 11 +++++++---- sym.h | 7 +++++-- unihash.h | 9 ++++++++- 12 files changed, 97 insertions(+), 27 deletions(-) diff --git a/atom.c b/atom.c index dd02f54..768d229 100644 --- a/atom.c +++ b/atom.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: atom.c,v 1.3 2001/01/25 21:13:15 mdw Exp $ + * $Id: atom.c,v 1.4 2003/12/15 20:53:47 mdw Exp $ * * Atom management * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: atom.c,v $ + * Revision 1.4 2003/12/15 20:53:47 mdw + * Add global unihash table; use universal hashing instead of CRC. + * * Revision 1.3 2001/01/25 21:13:15 mdw * New function allowing an atom's length to be specified at intern time. * @@ -48,9 +51,9 @@ #include "alloc.h" #include "atom.h" -#include "crc32.h" #include "hash.h" #include "sym.h" +#include "unihash.h" /*----- Static variables --------------------------------------------------*/ @@ -168,7 +171,7 @@ atom *atom_gensym(atom_table *t) a->b.name = (char *)(a + 1); memcpy(a->b.name, buf, sz); a->b.len = sz; - CRC32(a->b.b.hash, 0, buf, sz); + a->b.b.hash = UNIHASH(&unihash_global, buf, sz); a->f = ATOMF_GENSYM; a->b.b.next = t->g; t->g = &a->b.b; diff --git a/crc-mktab.c b/crc-mktab.c index 2439e20..1833e0e 100644 --- a/crc-mktab.c +++ b/crc-mktab.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: crc-mktab.c,v 1.4 2001/03/10 10:59:21 mdw Exp $ + * $Id: crc-mktab.c,v 1.5 2003/12/15 20:53:47 mdw Exp $ * * Build CRC tables * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: crc-mktab.c,v $ + * Revision 1.5 2003/12/15 20:53:47 mdw + * Add global unihash table; use universal hashing instead of CRC. + * * Revision 1.4 2001/03/10 10:59:21 mdw * Fix generating tables where the chunk size is longer than the * polynomial. Also fix output formatting when there aren't enough entries @@ -99,7 +102,8 @@ static void version(FILE *fp) static void usage(FILE *fp) { - pquis(fp, "Usage: $ [-cr] [-g guard] [-o file] [-b bits] [-p poly]\n"); + pquis(fp, "Usage: $ [-cr] [-o FILE] [-g GUARD] [-s SYM] [-i HEADER]\n\ + [-t TYPE] [-b BITS] [-B BITS] [-p POLY]\n"); } static void help(FILE *fp) @@ -121,7 +125,8 @@ of options are provided:\n\ -p, --polynomial=POLY Use the POLY as the dividing polynomial.\n\ -r, --reverse Create a `reversed' CRC table.\n\ -g, --guard=GUARD Use GUARD as a multiple-inclusion guard constant.\n\ --s, --symbol=SYM Name the generated table SYM\n\ +-i, --include=HEADER Include HEADER at top of C source file.\n\ +-s, --symbol=SYM Name the generated table SYM.\n\ -t, --type=TYPE Give the table entries type TYPE in C source.\n\ -o, --output=FILE Write the output to FILE.\n\ ", stdout); diff --git a/debian/changelog b/debian/changelog index d426a53..014355f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,12 +1,12 @@ mlib (2.0.3) experimental; urgency=low * Document hex encoding/decoding. - * Add file descriptor passing. + * Add ADNS-based background resolver. + * Split binaries off into their own package. + * Document, fix and test universal hashing; use it in symbol tables. - * Add ADNS-based background resolver. - - -- Mark Wooding Sat, 13 Dec 2003 19:54:22 +0000 + -- Mark Wooding Mon, 15 Dec 2003 20:54:16 +0000 mlib (2.0.2) experimental; urgency=low diff --git a/debian/control b/debian/control index e6a480f..b691aad 100644 --- a/debian/control +++ b/debian/control @@ -12,6 +12,7 @@ Conflicts: mlib2-adns Description: A library of miscellaneous stuff The mLib library provides various handy utilities, including * yet another options parser, like GNU getopt but more so; + * a simple but efficient universal hashing family; * a suite for writing event-driven select-based servers; * a simple exception-handling system, based on longjmp; * dynamically resizing strings and arrays; @@ -31,6 +32,7 @@ Conflicts: mlib2 Description: A library of miscellaneous stuff The mLib library provides various handy utilities, including * yet another options parser, like GNU getopt but more so; + * a simple but efficient universal hashing family; * a suite for writing event-driven select-based servers; * a simple exception-handling system, based on longjmp; * dynamically resizing strings and arrays; @@ -46,9 +48,11 @@ Package: mlib-dev Architecture: any Depends: mlib2 (= ${Source-Version}) | mlib2-adns (= ${Source-Version}), libc6-dev +Recommends: mlib-bin Description: A library of miscellaneous stuff The mLib library provides various handy utilities, including * yet another options parser, like GNU getopt but more so; + * a simple but efficient universal hashing family; * a suite for writing event-driven select-based servers; * a simple exception-handling system, based on longjmp; * dynamically resizing strings and arrays; @@ -57,3 +61,20 @@ Description: A library of miscellaneous stuff * a simple background DNS resolver. This package contains the header files and static libraries needed to compile programs which use mLib. + +Package: mlib-bin +Architecture: any +Depends: ${shlibs:Depends} +Recommends: mlib-dev +Description: A library of miscellaneous stuff + The mLib library provides various handy utilities, including + * yet another options parser, like GNU getopt but more so; + * a simple but efficient universal hashing family; + * a suite for writing event-driven select-based servers; + * a simple exception-handling system, based on longjmp; + * dynamically resizing strings and arrays; + * a resizing hashtable; + * base64 and hex encoding and decoding; and + * a simple background DNS resolver. + This package contains some utility programs: in particular, to generate + static tables for efficient CRC and universal hashing computations. diff --git a/debian/rules b/debian/rules index 1f22ffb..5e94001 100755 --- a/debian/rules +++ b/debian/rules @@ -36,13 +36,17 @@ install: build mv debian/mlib2/usr/lib/*.so debian/mlib-dev/usr/lib mv debian/mlib2/usr/lib/*.la debian/mlib-dev/usr/lib mv debian/mlib2/usr/include debian/mlib-dev/usr + mkdir -p debian/mlib-bin/usr/share/man + mv debian/mlib2/usr/bin debian/mlib-bin/usr + mv debian/mlib2/usr/share/man/man1 debian/mlib-bin/usr/share/man make -C deb-build install DESTDIR=`pwd`/debian/mlib2-adns - rm debian/mlib2-adns/usr/bin/mLib-config - rm -r debian/mlib2-adns/usr/share/man/man3 + rmdir debian/mlib2-adns/usr/lib/mLib + rm -rf debian/mlib2-adns/usr/bin + rm -rf debian/mlib2-adns/usr/share/man + rm -rf debian/mlib2-adns/usr/include rm debian/mlib2-adns/usr/lib/*.a rm debian/mlib2-adns/usr/lib/*.so rm debian/mlib2-adns/usr/lib/*.la - rm -rf debian/mlib2-adns/usr/include dh_strip -a binary-indep: diff --git a/man/crc-mktab.1 b/man/crc-mktab.1 index 0d285c2..1e8bfb4 100644 --- a/man/crc-mktab.1 +++ b/man/crc-mktab.1 @@ -61,7 +61,7 @@ Print a one-line usage summary to standard output and exit successfully. Produce a C source file which exports a symbol naming the array, instead of a C header file. .TP -.BI "\s, \-\-symbol=" symbol +.BI "\-s, \-\-symbol=" symbol Name the table .IR symbol . This is the name of the macro defined by a header file, or the array diff --git a/man/mLib.3 b/man/mLib.3 index c942ac1..f7d5511 100644 --- a/man/mLib.3 +++ b/man/mLib.3 @@ -147,8 +147,14 @@ stack operations efficiently. .SS "Miscellaneous utilities" The .BR crc32 (3) -module calculates CRC values for strings. It's used by the symbol table -manager as a hash function. +module calculates CRC values for strings. It used to be used by the +symbol table manager as a hash function. +.PP +The +.BR unihash (3) +module implements a simple but efficient universal hashing family. This +is a keyed hash function which provides security against an adversary +choosing input to a hash table deliberately to cause collisions. .PP The .BR lock (3) diff --git a/man/sym.3 b/man/sym.3 index fb45af9..967d024 100644 --- a/man/sym.3 +++ b/man/sym.3 @@ -224,10 +224,13 @@ for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; ) { .VE That ought to be enough examples to be getting on with. .SS Implementation -The symbol table is an extensible hashtable, using a 32-bit CRC as the -hash function. The hash chains are kept very short (probably too short, -actually). Every time a symbol is found, its block is promoted to the -front of its bin chain so it gets found faster next time. +The symbol table is an extensible hashtable, using the universal hash +function described in +.BR unihash (3) +and the global hashing key. The hash chains are kept very short +(probably too short, actually). Every time a symbol is found, its block +is promoted to the front of its bin chain so it gets found faster next +time. .SH SEE ALSO .BR hash (3), .BR mLib (3). diff --git a/man/unihash.3 b/man/unihash.3 index 49ef2e0..ddfb227 100644 --- a/man/unihash.3 +++ b/man/unihash.3 @@ -45,6 +45,8 @@ unihash \- simple and efficient universal hashing for hashtables .nf .B "#include " +.B "unihash_info unihash_global;" + .BI "void unihash_setkey(unihash_info *" i ", uint32 " k ); .BI "uint32 UNIHASH_INIT(const unihash_info *" i ); .BI "void unihash_hash(const unihash_info *" st ", uint32 " a , @@ -125,6 +127,18 @@ and function are convenient interfaces to .B unihash_hash if you only wanted to hash one chunk. +.SS "Global hash info table" +There's no problem with using the same key for several purposes, as long +as it's secret from all of your adversaries. Therefore, there is a +global +.B unihash_info +table define, called +.BR unihash_global . +This initially contains information for a fixed key which the author +chose at random, but if you need to you can set a different key into it +.I before +it gets used to hash any data (otherwise your hash tables will become +messed up). .SS "Theoretical issues" The hash function implemented by .B unihash @@ -176,6 +190,7 @@ dependent on any unproven assumptions (unlike many `proofs' of cryptographic security, which actually reduce the security of some construction to the security of its components). It's just a fact. .SH SEE ALSO +.BR unihash-mkstatic (3), .BR crc32 (3), .BR mLib (3). .SH AUTHOR diff --git a/sym.c b/sym.c index 3aea8c7..a3c9b86 100644 --- a/sym.c +++ b/sym.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: sym.c,v 1.13 2001/01/25 21:14:49 mdw Exp $ + * $Id: sym.c,v 1.14 2003/12/15 20:53:47 mdw Exp $ * * Symbol table management * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: sym.c,v $ + * Revision 1.14 2003/12/15 20:53:47 mdw + * Add global unihash table; use universal hashing instead of CRC. + * * Revision 1.13 2001/01/25 21:14:49 mdw * Always add a terminating null, and don't count it in the length. * @@ -88,11 +91,11 @@ #include "alloc.h" #include "arena.h" #include "bits.h" -#include "crc32.h" #include "exc.h" #include "hash.h" #include "sub.h" #include "sym.h" +#include "unihash.h" /*----- Main code ---------------------------------------------------------*/ @@ -141,7 +144,7 @@ void sym_destroy(sym_table *t) /* --- @sym_find@ --- * * * Arguments: @sym_table *t@ = pointer to symbol table in question - * @const char *n@ = pointer to symbol table to look up + * @const char *n@ = pointer to symbol name to look up * @long l@ = length of the name string or negative to measure * @size_t sz@ = size of desired symbol object, or zero * @unsigned *f@ = pointer to a flag, or null. @@ -179,7 +182,7 @@ void *sym_find(sym_table *t, const char *n, long l, size_t sz, unsigned *f) /* --- Find the correct bin --- */ len = l < 0 ? strlen(n) : l; - CRC32(hash, 0, n, len); + hash = UNIHASH(&unihash_global, n, len); bin = HASH_BIN(&t->t, hash); /* --- Search the bin list --- */ diff --git a/sym.h b/sym.h index db49ff5..46e84f5 100644 --- a/sym.h +++ b/sym.h @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: sym.h,v 1.12 2001/01/20 11:49:37 mdw Exp $ + * $Id: sym.h,v 1.13 2003/12/15 20:53:47 mdw Exp $ * * Symbol table management * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: sym.h,v $ + * Revision 1.13 2003/12/15 20:53:47 mdw + * Add global unihash table; use universal hashing instead of CRC. + * * Revision 1.12 2001/01/20 11:49:37 mdw * Export tuning parameters from header file, for the benefit of other * hashtable implementations. Change the storage of symbol names: store @@ -189,7 +192,7 @@ extern void sym_destroy(sym_table */*t*/); /* --- @sym_find@ --- * * * Arguments: @sym_table *t@ = pointer to symbol table in question - * @const char *n@ = pointer to symbol table to look up + * @const char *n@ = pointer to symbol name to look up * @long l@ = length of the name string or negative to measure * @size_t sz@ = size of desired symbol object, or zero * @unsigned *f@ = pointer to a flag, or null. diff --git a/unihash.h b/unihash.h index dfc5b79..15adb90 100644 --- a/unihash.h +++ b/unihash.h @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: unihash.h,v 1.2 2003/12/14 14:45:30 mdw Exp $ + * $Id: unihash.h,v 1.3 2003/12/15 20:53:47 mdw Exp $ * * Simple and efficient universal hashing for hashtables * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: unihash.h,v $ + * Revision 1.3 2003/12/15 20:53:47 mdw + * Add global unihash table; use universal hashing instead of CRC. + * * Revision 1.2 2003/12/14 14:45:30 mdw * Test universal hashing and fix bugs. * @@ -138,6 +141,10 @@ typedef struct unihash_info { uint32 s[UNIHASH_NBATCH][4][256]; /* S-tables as described */ } unihash_info; +/*----- A global hash-info table ------------------------------------------*/ + +extern unihash_info unihash_global; /* Key this if you like */ + /*----- Functions provided ------------------------------------------------*/ /* --- @unihash_setkey@ --- * -- [mdw]