[mLib] / unihash.c

/* -*-c-*-
 *
 * $Id: unihash.c,v 1.3 2004/04/08 01:36:13 mdw Exp $
 *
 * Simple and efficient universal hashing for hashtables
 *
 * (c) 2003 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------* 
 *
 * This file is part of the mLib utilities library.
 *
 * mLib is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * mLib is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 * 
 * You should have received a copy of the GNU Library General Public
 * License along with mLib; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA.
 */

/*----- Header files ------------------------------------------------------*/

#include <assert.h>
#include <stdlib.h>

#include "unihash.h"

/*----- Main code ---------------------------------------------------------*/

/* --- @unihash_setkey@ --- *
 *
 * Arguments:	@unihash_info *i@ = where to store the precomputed tables
 *		@uint32 k@ = the key to set, randomly chosen
 *
 * Returns:	---
 *
 * Use:		Calculates the tables required for efficient hashing.
 */

static uint32 mul(uint32 x, uint32 y)
{
  uint32 z = 0;
  while (y) {
    if (y & 1) z ^= x;
    if (x & (1 << 31))
      x = U32(x << 1) ^ UNIHASH_POLY;
    else
      x = U32(x << 1);
    y = U32(y >> 1);
  }
  return (z);
}

void unihash_setkey(unihash_info *i, uint32 k)
{
  size_t a;
  size_t b;
  uint32 x = 1;

  for (a = 0; a < UNIHASH_NBATCH; a++) {
    x = mul(x, k);
    for (b = 0; b < 256; b++) {
      i->s[a][0][b] = mul(x, b <<  0);
      i->s[a][1][b] = mul(x, b <<  8);
      i->s[a][2][b] = mul(x, b << 16);
      i->s[a][3][b] = mul(x, b << 24);
    }
  }
}

/* --- @unihash_hash@ --- *
 *
 * Arguments:	@const unihash_info *i@ = pointer to precomputed table
 *		@uint32 a@ = @i->[0][0][1]@ or value from previous call
 *		@const void *p@ = pointer to data to hash
 *		@size_t sz@ = size of the data
 *
 * Returns:	Hash of data so far.
 *
 * Use:		Hashes data.  Call this as many times as needed.  
 */

uint32 unihash_hash(const unihash_info *i, uint32 a,
		    const void *p, size_t sz)
{
  const octet *pp = p;

  assert(UNIHASH_NBATCH == 4);

#define FULLMULT(u, x)							\
  (i->s[u][0][U8((x) >>  0)] ^ i->s[u][1][U8((x) >>  8)] ^		\
   i->s[u][2][U8((x) >> 16)] ^ i->s[u][3][U8((x) >> 24)]);

#define BYTEMULT(u, x) i->s[u][0][x]

  /* --- Do the main bulk in batches of %$n$% bytes --- *
   *
   * We have %$a$% and %$m_{n-1}, \ldots, m_1, m_0$%; we want
   *
   * %$a' = (a + m_{n-1}) k^n + m_{n-2} k^{n-1} + \cdots + m_1 k^2 + m_0 k$%
   */

  while (sz >= UNIHASH_NBATCH) {
    a ^= *pp++;
    a = FULLMULT(3, a);
    a ^= BYTEMULT(2, *pp++);
    a ^= BYTEMULT(1, *pp++);
    a ^= BYTEMULT(0, *pp++);
    sz -= UNIHASH_NBATCH;
  }

  /* --- The tail end is a smaller batch --- */

  switch (sz) {
    case  3: a ^= *pp++; a = FULLMULT(2, a); goto batch_2;
    case  2: a ^= *pp++; a = FULLMULT(1, a); goto batch_1;
    case  1: a ^= *pp++; a = FULLMULT(0, a); goto batch_0;
    batch_2: a ^= BYTEMULT(1, *pp++);
    batch_1: a ^= BYTEMULT(0, *pp++);
    batch_0: break;
  }

  return (a);
}

/* --- @unihash@ --- *
 *
 * Arguments:	@const unihash_info *i@ = precomputed tables
 *		@const void *p@ = pointer to data to hash
 *		@size_t sz@ = size of the data
 *
 * Returns:	The hash value computed.
 *
 * Use:		All-in-one hashing function.  No faster than using the
 * 		separate calls, but more convenient.
 */

uint32 unihash(const unihash_info *i, const void *p, size_t sz)
{
  return (UNIHASH(i, p, sz));
}

/*----- Test rig ----------------------------------------------------------*/

#ifdef TEST_RIG

#include "testrig.h"

static int verify(dstr *v)
{
  unihash_info ui;
  uint32 k;
  uint32 h, hh;
  size_t n;
  int i, c;
  const char *p;
  int ok = 1;

  static const int step[] = { 0, 1, 5, 6, 7, 8, 23, -1 };

  /* --- Set up for using this key --- */

  k = *(uint32 *)v[0].buf;
  h = *(uint32 *)v[2].buf;
  unihash_setkey(&ui, k);

  /* --- Hash the data a lot --- */

  for (i = 0; step[i] >= 0; i++) {
    c = step[i];
    if (!c)
      hh = unihash(&ui, v[1].buf, v[1].len);
    else {
      hh = UNIHASH_INIT(&ui);
      p = v[1].buf;
      n = v[1].len;
      while (n) {
	if (c > n) c = n;
	hh = unihash_hash(&ui, hh, p, c);
	p += c;
	n -= c;
      }
    }
    if (h != hh) {
      ok = 0;
      fprintf(stderr, "\nunihash failed\n");
      fprintf(stderr, "  key = %08lx\n", (unsigned long)k);
      fprintf(stderr, "  data = %s\n", v[1].buf);
      fprintf(stderr, "  step = %d\n", step[i]);
      fprintf(stderr, "  expected = %08lx\n", (unsigned long)h);
      fprintf(stderr, "  computed = %08lx\n", (unsigned long)hh);
    }
  }
  return (ok);
}

static const test_chunk tests[] = {
  { "hash", verify, { &type_uint32, &type_string, &type_uint32 } },
  { 0, 0, { 0 } }
};

int main(int argc, char *argv[])
{
  test_run(argc, argv, tests, "unihash.in");
  return (0);
}

#endif

/*----- That's all, folks -------------------------------------------------*/
Commit	Line	Data
8fe3c82b	1	/* --c--
8fe3c82b	2	*
8656dc50	3	* $Id: unihash.c,v 1.3 2004/04/08 01:36:13 mdw Exp $
8fe3c82b	4	*
	5	* Simple and efficient universal hashing for hashtables
	6	*
	7	* (c) 2003 Straylight/Edgeware
	8	*/
	9
	10	/----- Licensing notice --------------------------------------------------
	11	*
	12	* This file is part of the mLib utilities library.
	13	*
	14	* mLib is free software; you can redistribute it and/or modify
	15	* it under the terms of the GNU Library General Public License as
	16	* published by the Free Software Foundation; either version 2 of the
	17	* License, or (at your option) any later version.
	18	*
	19	* mLib is distributed in the hope that it will be useful,
	20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	22	* GNU Library General Public License for more details.
	23	*
	24	* You should have received a copy of the GNU Library General Public
	25	* License along with mLib; if not, write to the Free
	26	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	27	* MA 02111-1307, USA.
	28	*/
	29
8fe3c82b	30	/----- Header files ------------------------------------------------------/
	31
	32	#include <assert.h>
	33	#include <stdlib.h>
	34
	35	#include "unihash.h"
	36
	37	/----- Main code ---------------------------------------------------------/
	38
	39	/* --- @unihash_setkey@ --- *
	40	*
	41	* Arguments: @unihash_info *i@ = where to store the precomputed tables
	42	* @uint32 k@ = the key to set, randomly chosen
	43	*
	44	* Returns: ---
	45	*
	46	* Use: Calculates the tables required for efficient hashing.
	47	*/
	48
	49	static uint32 mul(uint32 x, uint32 y)
	50	{
	51	uint32 z = 0;
	52	while (y) {
	53	if (y & 1) z ^= x;
	54	if (x & (1 << 31))
	55	x = U32(x << 1) ^ UNIHASH_POLY;
	56	else
	57	x = U32(x << 1);
	58	y = U32(y >> 1);
	59	}
	60	return (z);
	61	}
	62
	63	void unihash_setkey(unihash_info *i, uint32 k)
	64	{
	65	size_t a;
	66	size_t b;
	67	uint32 x = 1;
	68
	69	for (a = 0; a < UNIHASH_NBATCH; a++) {
	70	x = mul(x, k);
	71	for (b = 0; b < 256; b++) {
	72	i->s[a][0][b] = mul(x, b << 0);
	73	i->s[a][1][b] = mul(x, b << 8);
	74	i->s[a][2][b] = mul(x, b << 16);
	75	i->s[a][3][b] = mul(x, b << 24);
	76	}
	77	}
	78	}
	79
	80	/* --- @unihash_hash@ --- *
	81	*
	82	* Arguments: @const unihash_info *i@ = pointer to precomputed table
	83	* @uint32 a@ = @i->[0][0][1]@ or value from previous call
	84	* @const void *p@ = pointer to data to hash
	85	* @size_t sz@ = size of the data
	86	*
573eadb5	87	* Returns: Hash of data so far.
8fe3c82b	88	*
	89	* Use: Hashes data. Call this as many times as needed.
	90	*/
	91
	92	uint32 unihash_hash(const unihash_info *i, uint32 a,
	93	const void *p, size_t sz)
	94	{
	95	const octet *pp = p;
	96
	97	assert(UNIHASH_NBATCH == 4);
	98
	99	#define FULLMULT(u, x) \
	100	(i->s[u][0][U8((x) >> 0)] ^ i->s[u][1][U8((x) >> 8)] ^ \
	101	i->s[u][2][U8((x) >> 16)] ^ i->s[u][3][U8((x) >> 24)]);
	102
	103	#define BYTEMULT(u, x) i->s[u][0][x]
	104
	105	/* --- Do the main bulk in batches of %$n$% bytes --- *
	106	*
	107	* We have %$a$% and %$m_{n-1}, \ldots, m_1, m_0$%; we want
	108	*
	109	* %$a' = (a + m_{n-1}) k^n + m_{n-2} k^{n-1} + \cdots + m_1 k^2 + m_0 k$%
	110	*/
	111
	112	while (sz >= UNIHASH_NBATCH) {
	113	a ^= *pp++;
	114	a = FULLMULT(3, a);
	115	a ^= BYTEMULT(2, *pp++);
	116	a ^= BYTEMULT(1, *pp++);
	117	a ^= BYTEMULT(0, *pp++);
573eadb5	118	sz -= UNIHASH_NBATCH;
8fe3c82b	119	}
	120
	121	/* --- The tail end is a smaller batch --- */
	122
	123	switch (sz) {
	124	case 3: a ^= *pp++; a = FULLMULT(2, a); goto batch_2;
	125	case 2: a ^= *pp++; a = FULLMULT(1, a); goto batch_1;
	126	case 1: a ^= *pp++; a = FULLMULT(0, a); goto batch_0;
	127	batch_2: a ^= BYTEMULT(1, *pp++);
	128	batch_1: a ^= BYTEMULT(0, *pp++);
	129	batch_0: break;
	130	}
	131
	132	return (a);
	133	}
	134
	135	/* --- @unihash@ --- *
	136	*
	137	* Arguments: @const unihash_info *i@ = precomputed tables
	138	* @const void *p@ = pointer to data to hash
	139	* @size_t sz@ = size of the data
	140	*
	141	* Returns: The hash value computed.
	142	*
	143	* Use: All-in-one hashing function. No faster than using the
	144	* separate calls, but more convenient.
	145	*/
	146
	147	uint32 unihash(const unihash_info i, const void p, size_t sz)
	148	{
	149	return (UNIHASH(i, p, sz));
	150	}
	151
573eadb5	152	/----- Test rig ----------------------------------------------------------/
	153
	154	#ifdef TEST_RIG
	155
	156	#include "testrig.h"
	157
	158	static int verify(dstr *v)
	159	{
	160	unihash_info ui;
	161	uint32 k;
	162	uint32 h, hh;
	163	size_t n;
	164	int i, c;
	165	const char *p;
	166	int ok = 1;
	167
	168	static const int step[] = { 0, 1, 5, 6, 7, 8, 23, -1 };
	169
	170	/* --- Set up for using this key --- */
	171
	172	k = (uint32 )v[0].buf;
	173	h = (uint32 )v[2].buf;
	174	unihash_setkey(&ui, k);
	175
	176	/* --- Hash the data a lot --- */
	177
	178	for (i = 0; step[i] >= 0; i++) {
	179	c = step[i];
	180	if (!c)
	181	hh = unihash(&ui, v[1].buf, v[1].len);
	182	else {
	183	hh = UNIHASH_INIT(&ui);
	184	p = v[1].buf;
	185	n = v[1].len;
	186	while (n) {
	187	if (c > n) c = n;
	188	hh = unihash_hash(&ui, hh, p, c);
	189	p += c;
	190	n -= c;
	191	}
	192	}
	193	if (h != hh) {
	194	ok = 0;
	195	fprintf(stderr, "\nunihash failed\n");
	196	fprintf(stderr, " key = %08lx\n", (unsigned long)k);
	197	fprintf(stderr, " data = %s\n", v[1].buf);
	198	fprintf(stderr, " step = %d\n", step[i]);
	199	fprintf(stderr, " expected = %08lx\n", (unsigned long)h);
	200	fprintf(stderr, " computed = %08lx\n", (unsigned long)hh);
	201	}
	202	}
	203	return (ok);
	204	}
	205
	206	static const test_chunk tests[] = {
	207	{ "hash", verify, { &type_uint32, &type_string, &type_uint32 } },
	208	{ 0, 0, { 0 } }
	209	};
	210
	211	int main(int argc, char *argv[])
	212	{
	213	test_run(argc, argv, tests, "unihash.in");
	214	return (0);
	215	}
216
217	#endif
218
8fe3c82b	219	/----- That's all, folks -------------------------------------------------/