/* -*-c-*-
*
* $Id: unihash.h,v 1.2 2003/12/14 14:45:30 mdw Exp $
*
* Simple and efficient universal hashing for hashtables
*
* (c) 2003 Straylight/Edgeware
*/
/*----- Licensing notice --------------------------------------------------*
*
* This file is part of the mLib utilities library.
*
* mLib is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* mLib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with mLib; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
* MA 02111-1307, USA.
*/
/*----- Revision history --------------------------------------------------*
*
* $Log: unihash.h,v $
* Revision 1.2 2003/12/14 14:45:30 mdw
* Test universal hashing and fix bugs.
*
* Revision 1.1 2003/10/12 14:43:24 mdw
* Universal hashing.
*
*/
#ifndef MLIB_UNIHASH_H
#define MLIB_UNIHASH_H
#ifdef __cplusplus
extern "C" {
#endif
/*----- Concept -----------------------------------------------------------*
*
* Let %$\gf{q}$% be a finite field. Choose an arbitrary %$k \inr \gf{q}$%.
* Let %$M$% be a message. Injectively pad %$M$% and split it into blocks
* $m_{n-1}, m_{n-2}, \ldots, m_2, m_1, m_0$% in %$\gf{q}%.
* Then we compute
*
* %$H_k(M) = k^{n+1} + \sum_{0\le i m$%, and hence the coefficient of %$k_n$% is
* nonzero. Alternatively, if %$n = m$% then there must be some
* %$i \in \{ 0, \ldots, n - 1 \}$% with %$a_i \ne b_i$%, for otherwise the
* messages would be identical; but then the coefficient of %$k^{i+1}$% is
* %$a_i - b_i \ne 0$%.
*
* Hence we have a polynomial equation with degree at most %$\ell + 1$%;
* there must be at most %$\ell + 1$% solutions for %$k$%; but we choose
* %$k$% at random from a set of %$q$%; so the equation is true with
* probability at most %$(\ell + 1)/q$%.
*
* This function can be used as a simple MAC with provable security against
* computationally unbounded adversaries. Simply XOR the hash with a random
* string indexed from a large random pad by some nonce sent with the
* message. The probability of a forgery attempt being successful is then
* %$(\ell + 1)/2^t$%, where %$t$% is the tag length and %$\ell$% is the
* longest message permitted.
*/
/*----- Practicalities ----------------------------------------------------*
*
* We work in %$\gf{2^32}$%, represented as a field of polynomials modulo
* %$\texttt{104c11db7}_x$% (this is the standard CRC-32 polynomial). Our
* blocks are bytes.
*
* The choice of a 32-bit hash is made for pragmatic reasons: we're never
* likely to actually want all 32 bits for a real hashtable anyway. The
* truncation result is needed to keep us afloat with smaller tables.
*
* We compute hashes using a slightly unrolled version of Horner's rule,
* using the recurrence:
*
* %$a_{i+b} = (a_i + m_i) k^b + m_{i+1} k^{b-1} + \cdots + m_{i+b-1} k$%
*
* which involves one full-width multiply and %$b - 1$% one-byte multiplies;
* the latter may be efficiently computed using a table lookup. Start with
* %$a_0 = k$%.
*
* We precompute tables %$S[\cdot][\cdot][\cdot]$%, where
*
* %$S[u][v][w] = k^{u+1} x^{8v} w$%
* for %$0 \le u < b$%, %$0 \le v < 4$%, %$0 \le w < 256)$%.
*
* A one-byte multiply is one lookup; a full-width multiply is four lookups
* and three XORs. The processing required is then %$b + 3$% lookups and
* %$b + 3$% XORs per batch, or %$(b + 3)/b$% lookups and XORs per byte, at
* the expense of %$4 b$% kilobytes of tables. This compares relatively
* favorably with CRC32. Indeed, in tests, this implementation with $b = 4$%
* is faster than a 32-bit CRC.
*/
/*----- Header files ------------------------------------------------------*/
#include
#ifndef MLIB_BITS_H
# include "bits.h"
#endif
/*----- Data structures ---------------------------------------------------*/
#define UNIHASH_NBATCH 4
#define UNIHASH_POLY 0x04c11db7 /* From CRC32 */
typedef struct unihash_info {
uint32 s[UNIHASH_NBATCH][4][256]; /* S-tables as described */
} unihash_info;
/*----- Functions provided ------------------------------------------------*/
/* --- @unihash_setkey@ --- *
*
* Arguments: @unihash_info *i@ = where to store the precomputed tables
* @uint32 k@ = the key to set, randomly chosen
*
* Returns: ---
*
* Use: Calculates the tables required for efficient hashing.
*/
extern void unihash_setkey(unihash_info */*i*/, uint32 /*k*/);
/* --- @unihash_hash@ --- *
*
* Arguments: @const unihash_info *i@ = pointer to precomputed table
* @uint32 a@ = @UNIHASH_INIT(i)@ or value from previous call
* @const void *p@ = pointer to data to hash
* @size_t sz@ = size of the data
*
* Returns: Hash of data so far.
*
* Use: Hashes data. Call this as many times as needed.
*/
#define UNIHASH_INIT(i) ((i)->s[0][0][1]) /* %$k$% */
extern uint32 unihash_hash(const unihash_info */*i*/, uint32 /*a*/,
const void */*p*/, size_t /*sz*/);
/* --- @unihash@ --- *
*
* Arguments: @const unihash_info *i@ = precomputed tables
* @const void *p@ = pointer to data to hash
* @size_t sz@ = size of the data
*
* Returns: The hash value computed.
*
* Use: All-in-one hashing function. No faster than using the
* separate calls, but more convenient.
*/
#define UNIHASH(i, p, sz) (unihash_hash((i), UNIHASH_INIT((i)), (p), (sz)))
extern uint32 unihash(const unihash_info */*i*/,
const void */*p*/, size_t /*sz*/);
/*----- That's all, folks -------------------------------------------------*/
#ifdef __cplusplus
}
#endif
#endif