Test universal hashing and fix bugs.

author mdw <mdw>

Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)

committer mdw <mdw>

Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)
author mdw <mdw>
Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)
committer mdw <mdw>
Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)
diff --git a/Makefile.am b/Makefile.am

index 8e0c51d776a7fb4edb80fe79909fc2def35873a0..372acd82e39f293ac35ee92921635280a2018efb 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,6 +1,6 @@
  ## -*-Makefile-*-
  ##
-## $Id: Makefile.am,v 1.42 2003/12/13 20:37:59 mdw Exp $
+## $Id: Makefile.am,v 1.43 2003/12/14 14:45:30 mdw Exp $
  ##
  ## Building the distribution
  ##
@@ -29,6 +29,9 @@
  ##----- Revision history ----------------------------------------------------
  ##
  ## $Log: Makefile.am,v $
+## Revision 1.43  2003/12/14 14:45:30  mdw
+## Test universal hashing and fix bugs.
+##
  ## Revision 1.42  2003/12/13 20:37:59  mdw
  ## Add adns support in background resolver.
  ##
@@ -193,10 +196,11 @@ crc_mktab_SOURCES = crc-mktab.c mdwopt.c quis.c pquis.c report.c str.c
  
  ## --- Test code ---
  
-noinst_PROGRAMS = da.t sym.t assoc.t bits.t
+noinst_PROGRAMS = da.t sym.t assoc.t bits.t unihash.t
  
  check: \
-       da.test sym.test assoc.test bits.test base64.test hex.test
+       da.test sym.test assoc.test bits.test base64.test hex.test \
+       unihash.test
  
  da_t_SOURCES = da-test.c
  da_t_LDADD = libmLib.la
@@ -243,9 +247,11 @@ bits.test: bits.t
         ./bits.t -f $(srcdir)/bits.in
  
  base64.to: base64.c
-       $(COMPILE) -c -DTEST_RIG -DSRCDIR=\"$(srcdir)\" $(srcdir)/base64.c -o base64.to
+       $(COMPILE) -c -DTEST_RIG -DSRCDIR=\"$(srcdir)\" \
+               $(srcdir)/base64.c -o base64.to
  base64.t: base64.to base64.o libmLib.la
-       $(CC) $(CFLAGS) $(LDFLAGS) base64.to .libs/libmLib.a $(LIBS) -o base64.t
+       $(CC) $(CFLAGS) $(LDFLAGS) \
+               base64.to .libs/libmLib.a $(LIBS) -o base64.t
  base64.test: base64.t base64.in base64.ref
         ./base64.t <$(srcdir)/base64.in >base64.out
         cmp base64.out $(srcdir)/base64.ref
@@ -254,9 +260,11 @@ base64.test: base64.t base64.in base64.ref
         @echo "base64 tested OK."
  
  hex.to: hex.c
-       $(COMPILE) -c -DTEST_RIG -DSRCDIR=\"$(srcdir)\" $(srcdir)/hex.c -o hex.to
+       $(COMPILE) -c -DTEST_RIG -DSRCDIR=\"$(srcdir)\" \
+               $(srcdir)/hex.c -o hex.to
  hex.t: hex.to hex.o libmLib.la
-       $(CC) $(CFLAGS) $(LDFLAGS) hex.to .libs/libmLib.a $(LIBS) -o hex.t
+       $(CC) $(CFLAGS) $(LDFLAGS) \
+               hex.to .libs/libmLib.a $(LIBS) -o hex.t
  hex.test: hex.t hex.in hex.ref
         ./hex.t <$(srcdir)/hex.in >hex.out
         cmp hex.out $(srcdir)/hex.ref
@@ -264,11 +272,23 @@ hex.test: hex.t hex.in hex.ref
         cmp hex.out $(srcdir)/hex.in
         @echo "hex tested OK."
  
+unihash.to: unihash.c
+       $(COMPILE) -c -DTEST_RIG -DSRCDIR=\"$(srcdir)\" \
+               $(srcdir)/unihash.c -o unihash.to
+unihash.t: unihash.to libmLib.la
+       $(CC) $(CFLAGS) $(LDFLAGS) \
+               unihash.to .libs/libmLib.a $(LIBS) -o unihash.t
+unihash.in: unihash-check.pl
+       perl $(srcdir)/unihash-check.pl >unihash.in.new
+       mv unihash.in.new unihash.in
+unihash.test: unihash.t unihash.in
+       ./unihash.t -f unihash.in
+
  TEST_CLEAN = \
-       *.t \
+       *.t *.to \
         da.in da.ref da.out \
         sym.in sym.ref sym.out \
-       base64.out hex.out 
+       base64.out hex.out unihash.in
  
  TEST_DIST = \
         da-gtest da-ref \
diff --git a/unihash-check.pl b/unihash-check.pl

new file mode 100644 (file)

index 0000000..72c3b9b
--- /dev/null
+++ b/unihash-check.pl
@@ -0,0 +1,47 @@
+#! /usr/bin/perl
+
+my $MOD = 0x04c11db7;
+
+sub gfmul {
+  my ($x, $y) = @_;
+  my $a = 0;
+
+  while ($y) {
+    if ($y & 1) { $a ^= $x };
+    if ($x & 0x80000000) { $x <<= 1; $x ^= $MOD; }
+    else { $x <<= 1; }
+    $y >>= 1;
+  }
+  return $a;
+}
+
+sub hash {
+  my ($k, $msg) = @_;
+  my $h = $k;
+  for (my $i = 0; $i < length $msg; $i++) {
+    my $m = ord(substr($msg, $i, 1));
+    $h = gfmul($h ^ $m, $k);
+  }
+  printf "  0x%08x \"%s\" 0x%08x;\n", $k, $msg, $h;
+}
+
+print <<EOF;
+# test vectors for unihash
+
+hash {
+EOF
+hash(0x00000000, "anything you like");
+hash(0x12345678, "an exaple test string");
+hash(0xb8a171f0, "The quick brown fox jumps over the lazy dog.");
+hash(0x2940521b, "A man, a plan, a canal: Panama!");
+
+my $k = 0x94b22a73;
+my $m = 0xbb7b1fef;
+for (my $i = 0; $i < 48; $i++) {
+  hash($k, "If we don't succeed, we run the risk of failure.");
+  $k = gfmul($k, $m);
+}
+
+print <<EOF;
+}
+EOF
diff --git a/unihash.c b/unihash.c

index 39e2f06803d4cfe9c341dd19e1fc7306e01bc86f..e2093b9a9c429fca98ecdb7f6cf94c90503898eb 100644 (file)
--- a/unihash.c
+++ b/unihash.c
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
- * $Id: unihash.c,v 1.1 2003/10/12 14:43:24 mdw Exp $
+ * $Id: unihash.c,v 1.2 2003/12/14 14:45:30 mdw Exp $
   *
   * Simple and efficient universal hashing for hashtables
   *
@@ -30,6 +30,9 @@
  /*----- Revision history --------------------------------------------------* 
   *
   * $Log: unihash.c,v $
+ * Revision 1.2  2003/12/14 14:45:30  mdw
+ * Test universal hashing and fix bugs.
+ *
   * Revision 1.1  2003/10/12 14:43:24  mdw
   * Universal hashing.
   *
@@ -92,7 +95,7 @@ void unihash_setkey(unihash_info *i, uint32 k)
   *             @const void *p@ = pointer to data to hash
   *             @size_t sz@ = size of the data
   *
- * Returns:    ---
+ * Returns:    Hash of data so far.
   *
   * Use:                Hashes data.  Call this as many times as needed.  
   */
@@ -123,6 +126,7 @@ uint32 unihash_hash(const unihash_info *i, uint32 a,
      a ^= BYTEMULT(2, *pp++);
      a ^= BYTEMULT(1, *pp++);
      a ^= BYTEMULT(0, *pp++);
+    sz -= UNIHASH_NBATCH;
    }
  
    /* --- The tail end is a smaller batch --- */
@@ -156,4 +160,71 @@ uint32 unihash(const unihash_info *i, const void *p, size_t sz)
    return (UNIHASH(i, p, sz));
  }
  
+/*----- Test rig ----------------------------------------------------------*/
+
+#ifdef TEST_RIG
+
+#include "testrig.h"
+
+static int verify(dstr *v)
+{
+  unihash_info ui;
+  uint32 k;
+  uint32 h, hh;
+  size_t n;
+  int i, c;
+  const char *p;
+  int ok = 1;
+
+  static const int step[] = { 0, 1, 5, 6, 7, 8, 23, -1 };
+
+  /* --- Set up for using this key --- */
+
+  k = *(uint32 *)v[0].buf;
+  h = *(uint32 *)v[2].buf;
+  unihash_setkey(&ui, k);
+
+  /* --- Hash the data a lot --- */
+
+  for (i = 0; step[i] >= 0; i++) {
+    c = step[i];
+    if (!c)
+      hh = unihash(&ui, v[1].buf, v[1].len);
+    else {
+      hh = UNIHASH_INIT(&ui);
+      p = v[1].buf;
+      n = v[1].len;
+      while (n) {
+       if (c > n) c = n;
+       hh = unihash_hash(&ui, hh, p, c);
+       p += c;
+       n -= c;
+      }
+    }
+    if (h != hh) {
+      ok = 0;
+      fprintf(stderr, "\nunihash failed\n");
+      fprintf(stderr, "  key = %08lx\n", (unsigned long)k);
+      fprintf(stderr, "  data = %s\n", v[1].buf);
+      fprintf(stderr, "  step = %d\n", step[i]);
+      fprintf(stderr, "  expected = %08lx\n", (unsigned long)h);
+      fprintf(stderr, "  computed = %08lx\n", (unsigned long)hh);
+    }
+  }
+  return (ok);
+}
+
+static const test_chunk tests[] = {
+  { "hash", verify, { &type_uint32, &type_string, &type_uint32 } },
+  { 0, 0, { 0 } }
+};
+
+int main(int argc, char *argv[])
+{
+  test_run(argc, argv, tests, "unihash.in");
+  return (0);
+}
+
+#endif
+
  /*----- That's all, folks -------------------------------------------------*/
diff --git a/unihash.h b/unihash.h

index 5fb57416946c8b8c84051a2301475bbe16dab3b8..dfc5b796163ff95d8a616d0146b9ba0905946fcb 100644 (file)
--- a/unihash.h
+++ b/unihash.h
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
- * $Id: unihash.h,v 1.1 2003/10/12 14:43:24 mdw Exp $
+ * $Id: unihash.h,v 1.2 2003/12/14 14:45:30 mdw Exp $
   *
   * Simple and efficient universal hashing for hashtables
   *
@@ -30,6 +30,9 @@
  /*----- Revision history --------------------------------------------------* 
   *
   * $Log: unihash.h,v $
+ * Revision 1.2  2003/12/14 14:45:30  mdw
+ * Test universal hashing and fix bugs.
+ *
   * Revision 1.1  2003/10/12 14:43:24  mdw
   * Universal hashing.
   *
@@ -50,7 +53,7 @@
   * $m_{n-1}, m_{n-2}, \ldots, m_2, m_1, m_0$% in %$\gf{q}%.
   * Then we compute
   *
- *   %$H_k(M) = k^{n+1} \sum_{0\le i<n} m_i k^{i+1}.$%
+ *   %$H_k(M) = k^{n+1} + \sum_{0\le i<n} m_i k^{i+1}.$%
   *
   * Note that %$H_0(M) = 0$% for all messages %$M$%.
   *
@@ -82,15 +85,15 @@
   * computationally unbounded adversaries.  Simply XOR the hash with a random
   * string indexed from a large random pad by some nonce sent with the
   * message.  The probability of a forgery attempt being successful is then
- * %$(\ell + 1)/2^t$%, where %$t$% is the tag length and %$n$% is the longest
- * message permitted.
+ * %$(\ell + 1)/2^t$%, where %$t$% is the tag length and %$\ell$% is the
+ * longest message permitted.
   */
  
  /*----- Practicalities ----------------------------------------------------*
   *
   * We work in %$\gf{2^32}$%, represented as a field of polynomials modulo
- * %$\{104c11db7}_x$% (this is the standard CRC-32 polynomial).  Our blocks
- * are bytes.  We append a big-endian byte length.
+ * %$\texttt{104c11db7}_x$% (this is the standard CRC-32 polynomial).  Our
+ * blocks are bytes.
   *
   * The choice of a 32-bit hash is made for pragmatic reasons: we're never
   * likely to actually want all 32 bits for a real hashtable anyway.  The
@@ -156,7 +159,7 @@ extern void unihash_setkey(unihash_info */*i*/, uint32 /*k*/);
   *             @const void *p@ = pointer to data to hash
   *             @size_t sz@ = size of the data
   *
- * Returns:    ---
+ * Returns:    Hash of data so far.
   *
   * Use:                Hashes data.  Call this as many times as needed.  
   */
author	mdw <mdw>
	Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)
committer	mdw <mdw>
	Sun, 14 Dec 2003 14:45:30 +0000 (14:45 +0000)
Makefile.am		patch \| blob \| blame \| history
unihash-check.pl	[new file with mode: 0644]	patch \| blob
unihash.c		patch \| blob \| blame \| history
unihash.h		patch \| blob \| blame \| history