[mLib] / unihash.c

/* -*-c-*-
 *
 * Simple and efficient universal hashing for hashtables
 *
 * (c) 2003 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of the mLib utilities library.
 *
 * mLib is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * mLib is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with mLib; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA.
 */

/*----- Header files ------------------------------------------------------*/

#include <assert.h>
#include <stdlib.h>

#include "unihash.h"

/*----- Main code ---------------------------------------------------------*/

/* --- @unihash_setkey@ --- *
 *
 * Arguments:	@unihash_info *i@ = where to store the precomputed tables
 *		@uint32 k@ = the key to set, randomly chosen
 *
 * Returns:	---
 *
 * Use:		Calculates the tables required for efficient hashing.
 */

static uint32 mul(uint32 x, uint32 y)
{
  uint32 z = 0;
  while (y) {
    if (y & 1) z ^= x;
    if (x & (1 << 31))
      x = U32(x << 1) ^ UNIHASH_POLY;
    else
      x = U32(x << 1);
    y = U32(y >> 1);
  }
  return (z);
}

void unihash_setkey(unihash_info *i, uint32 k)
{
  size_t a;
  size_t b;
  uint32 x = 1;

  for (a = 0; a < UNIHASH_NBATCH; a++) {
    x = mul(x, k);
    for (b = 0; b < 256; b++) {
      i->s[a][0][b] = mul(x, b <<  0);
      i->s[a][1][b] = mul(x, b <<  8);
      i->s[a][2][b] = mul(x, b << 16);
      i->s[a][3][b] = mul(x, b << 24);
    }
  }
}

/* --- @unihash_hash@ --- *
 *
 * Arguments:	@const unihash_info *i@ = pointer to precomputed table
 *		@uint32 a@ = @i->[0][0][1]@ or value from previous call
 *		@const void *p@ = pointer to data to hash
 *		@size_t sz@ = size of the data
 *
 * Returns:	Hash of data so far.
 *
 * Use:		Hashes data.  Call this as many times as needed.
 */

uint32 unihash_hash(const unihash_info *i, uint32 a,
		    const void *p, size_t sz)
{
  const octet *pp = p;

  assert(UNIHASH_NBATCH == 4);

#define FULLMULT(u, x)							\
  (i->s[u][0][U8((x) >>	 0)] ^ i->s[u][1][U8((x) >>  8)] ^		\
   i->s[u][2][U8((x) >> 16)] ^ i->s[u][3][U8((x) >> 24)]);

#define BYTEMULT(u, x) i->s[u][0][x]

  /* --- Do the main bulk in batches of %$n$% bytes --- *
   *
   * We have %$a$% and %$m_{n-1}, \ldots, m_1, m_0$%; we want
   *
   * %$a' = (a + m_{n-1}) k^n + m_{n-2} k^{n-1} + \cdots + m_1 k^2 + m_0 k$%
   */

  while (sz >= UNIHASH_NBATCH) {
    a ^= *pp++;
    a = FULLMULT(3, a);
    a ^= BYTEMULT(2, *pp++);
    a ^= BYTEMULT(1, *pp++);
    a ^= BYTEMULT(0, *pp++);
    sz -= UNIHASH_NBATCH;
  }

  /* --- The tail end is a smaller batch --- */

  switch (sz) {
    case  3: a ^= *pp++; a = FULLMULT(2, a); goto batch_2;
    case  2: a ^= *pp++; a = FULLMULT(1, a); goto batch_1;
    case  1: a ^= *pp++; a = FULLMULT(0, a); goto batch_0;
    batch_2: a ^= BYTEMULT(1, *pp++);
    batch_1: a ^= BYTEMULT(0, *pp++);
    batch_0: break;
  }

  return (a);
}

/* --- @unihash@ --- *
 *
 * Arguments:	@const unihash_info *i@ = precomputed tables
 *		@const void *p@ = pointer to data to hash
 *		@size_t sz@ = size of the data
 *
 * Returns:	The hash value computed.
 *
 * Use:		All-in-one hashing function.  No faster than using the
 *		separate calls, but more convenient.
 */

uint32 unihash(const unihash_info *i, const void *p, size_t sz)
{
  return (UNIHASH(i, p, sz));
}

/*----- Test rig ----------------------------------------------------------*/

#ifdef TEST_RIG

#include "testrig.h"

static int verify(dstr *v)
{
  unihash_info ui;
  uint32 k;
  uint32 h, hh;
  size_t n;
  int i, c;
  const char *p;
  int ok = 1;

  static const int step[] = { 0, 1, 5, 6, 7, 8, 23, -1 };

  /* --- Set up for using this key --- */

  k = *(uint32 *)v[0].buf;
  h = *(uint32 *)v[2].buf;
  unihash_setkey(&ui, k);

  /* --- Hash the data a lot --- */

  for (i = 0; step[i] >= 0; i++) {
    c = step[i];
    if (!c)
      hh = unihash(&ui, v[1].buf, v[1].len);
    else {
      hh = UNIHASH_INIT(&ui);
      p = v[1].buf;
      n = v[1].len;
      while (n) {
	if (c > n) c = n;
	hh = unihash_hash(&ui, hh, p, c);
	p += c;
	n -= c;
      }
    }
    if (h != hh) {
      ok = 0;
      fprintf(stderr, "\nunihash failed\n");
      fprintf(stderr, "	 key = %08lx\n", (unsigned long)k);
      fprintf(stderr, "	 data = %s\n", v[1].buf);
      fprintf(stderr, "	 step = %d\n", step[i]);
      fprintf(stderr, "	 expected = %08lx\n", (unsigned long)h);
      fprintf(stderr, "	 computed = %08lx\n", (unsigned long)hh);
    }
  }
  return (ok);
}

static const test_chunk tests[] = {
  { "hash", verify, { &type_uint32, &type_string, &type_uint32 } },
  { 0, 0, { 0 } }
};

int main(int argc, char *argv[])
{
  test_run(argc, argv, tests, "unihash.in");
  return (0);
}

#endif

/*----- That's all, folks -------------------------------------------------*/
Commit	Line	Data
8fe3c82b	1	/* --c--
8fe3c82b	2	*
	3	* Simple and efficient universal hashing for hashtables
	4	*
	5	* (c) 2003 Straylight/Edgeware
	6	*/
	7
d4efbcd9	8	/----- Licensing notice --------------------------------------------------
8fe3c82b	9	*
	10	* This file is part of the mLib utilities library.
	11	*
	12	* mLib is free software; you can redistribute it and/or modify
	13	* it under the terms of the GNU Library General Public License as
	14	* published by the Free Software Foundation; either version 2 of the
	15	* License, or (at your option) any later version.
d4efbcd9	16	*
8fe3c82b	17	* mLib is distributed in the hope that it will be useful,
	18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	* GNU Library General Public License for more details.
d4efbcd9	21	*
8fe3c82b	22	* You should have received a copy of the GNU Library General Public
	23	* License along with mLib; if not, write to the Free
	24	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	25	* MA 02111-1307, USA.
	26	*/
	27
8fe3c82b	28	/----- Header files ------------------------------------------------------/
	29
	30	#include <assert.h>
	31	#include <stdlib.h>
	32
	33	#include "unihash.h"
	34
	35	/----- Main code ---------------------------------------------------------/
	36
	37	/* --- @unihash_setkey@ --- *
	38	*
	39	* Arguments: @unihash_info *i@ = where to store the precomputed tables
	40	* @uint32 k@ = the key to set, randomly chosen
	41	*
	42	* Returns: ---
	43	*
	44	* Use: Calculates the tables required for efficient hashing.
	45	*/
	46
	47	static uint32 mul(uint32 x, uint32 y)
	48	{
	49	uint32 z = 0;
	50	while (y) {
	51	if (y & 1) z ^= x;
	52	if (x & (1 << 31))
	53	x = U32(x << 1) ^ UNIHASH_POLY;
	54	else
	55	x = U32(x << 1);
	56	y = U32(y >> 1);
	57	}
	58	return (z);
	59	}
	60
	61	void unihash_setkey(unihash_info *i, uint32 k)
	62	{
	63	size_t a;
	64	size_t b;
	65	uint32 x = 1;
	66
	67	for (a = 0; a < UNIHASH_NBATCH; a++) {
	68	x = mul(x, k);
	69	for (b = 0; b < 256; b++) {
	70	i->s[a][0][b] = mul(x, b << 0);
	71	i->s[a][1][b] = mul(x, b << 8);
	72	i->s[a][2][b] = mul(x, b << 16);
	73	i->s[a][3][b] = mul(x, b << 24);
	74	}
	75	}
	76	}
	77
	78	/* --- @unihash_hash@ --- *
	79	*
	80	* Arguments: @const unihash_info *i@ = pointer to precomputed table
	81	* @uint32 a@ = @i->[0][0][1]@ or value from previous call
	82	* @const void *p@ = pointer to data to hash
	83	* @size_t sz@ = size of the data
	84	*
573eadb5	85	* Returns: Hash of data so far.
8fe3c82b	86	*
d4efbcd9	87	* Use: Hashes data. Call this as many times as needed.
8fe3c82b	88	*/
	89
	90	uint32 unihash_hash(const unihash_info *i, uint32 a,
	91	const void *p, size_t sz)
	92	{
	93	const octet *pp = p;
	94
	95	assert(UNIHASH_NBATCH == 4);
	96
	97	#define FULLMULT(u, x) \
d4efbcd9	98	(i->s[u][0][U8((x) >> 0)] ^ i->s[u][1][U8((x) >> 8)] ^ \
8fe3c82b	99	i->s[u][2][U8((x) >> 16)] ^ i->s[u][3][U8((x) >> 24)]);
	100
	101	#define BYTEMULT(u, x) i->s[u][0][x]
	102
	103	/* --- Do the main bulk in batches of %$n$% bytes --- *
	104	*
	105	* We have %$a$% and %$m_{n-1}, \ldots, m_1, m_0$%; we want
	106	*
	107	* %$a' = (a + m_{n-1}) k^n + m_{n-2} k^{n-1} + \cdots + m_1 k^2 + m_0 k$%
	108	*/
	109
	110	while (sz >= UNIHASH_NBATCH) {
	111	a ^= *pp++;
	112	a = FULLMULT(3, a);
	113	a ^= BYTEMULT(2, *pp++);
	114	a ^= BYTEMULT(1, *pp++);
	115	a ^= BYTEMULT(0, *pp++);
573eadb5	116	sz -= UNIHASH_NBATCH;
8fe3c82b	117	}
	118
	119	/* --- The tail end is a smaller batch --- */
	120
	121	switch (sz) {
	122	case 3: a ^= *pp++; a = FULLMULT(2, a); goto batch_2;
	123	case 2: a ^= *pp++; a = FULLMULT(1, a); goto batch_1;
	124	case 1: a ^= *pp++; a = FULLMULT(0, a); goto batch_0;
	125	batch_2: a ^= BYTEMULT(1, *pp++);
	126	batch_1: a ^= BYTEMULT(0, *pp++);
	127	batch_0: break;
	128	}
	129
	130	return (a);
	131	}
	132
	133	/* --- @unihash@ --- *
	134	*
	135	* Arguments: @const unihash_info *i@ = precomputed tables
	136	* @const void *p@ = pointer to data to hash
	137	* @size_t sz@ = size of the data
	138	*
	139	* Returns: The hash value computed.
	140	*
	141	* Use: All-in-one hashing function. No faster than using the
d4efbcd9	142	* separate calls, but more convenient.
8fe3c82b	143	*/
	144
	145	uint32 unihash(const unihash_info i, const void p, size_t sz)
	146	{
	147	return (UNIHASH(i, p, sz));
	148	}
	149
573eadb5	150	/----- Test rig ----------------------------------------------------------/
	151
	152	#ifdef TEST_RIG
	153
	154	#include "testrig.h"
	155
	156	static int verify(dstr *v)
	157	{
	158	unihash_info ui;
	159	uint32 k;
	160	uint32 h, hh;
	161	size_t n;
	162	int i, c;
	163	const char *p;
	164	int ok = 1;
	165
	166	static const int step[] = { 0, 1, 5, 6, 7, 8, 23, -1 };
	167
	168	/* --- Set up for using this key --- */
	169
	170	k = (uint32 )v[0].buf;
	171	h = (uint32 )v[2].buf;
	172	unihash_setkey(&ui, k);
	173
	174	/* --- Hash the data a lot --- */
	175
	176	for (i = 0; step[i] >= 0; i++) {
	177	c = step[i];
	178	if (!c)
	179	hh = unihash(&ui, v[1].buf, v[1].len);
	180	else {
	181	hh = UNIHASH_INIT(&ui);
	182	p = v[1].buf;
	183	n = v[1].len;
	184	while (n) {
	185	if (c > n) c = n;
	186	hh = unihash_hash(&ui, hh, p, c);
	187	p += c;
	188	n -= c;
	189	}
	190	}
	191	if (h != hh) {
	192	ok = 0;
	193	fprintf(stderr, "\nunihash failed\n");
d4efbcd9 MW	194	fprintf(stderr, " key = %08lx\n", (unsigned long)k);
	195	fprintf(stderr, " data = %s\n", v[1].buf);
	196	fprintf(stderr, " step = %d\n", step[i]);
	197	fprintf(stderr, " expected = %08lx\n", (unsigned long)h);
	198	fprintf(stderr, " computed = %08lx\n", (unsigned long)hh);
573eadb5	199	}
	200	}
	201	return (ok);
	202	}
	203
	204	static const test_chunk tests[] = {
	205	{ "hash", verify, { &type_uint32, &type_string, &type_uint32 } },
	206	{ 0, 0, { 0 } }
	207	};
	208
	209	int main(int argc, char *argv[])
	210	{
	211	test_run(argc, argv, tests, "unihash.in");
	212	return (0);
	213	}
	214
	215	#endif
	216
8fe3c82b	217	/----- That's all, folks -------------------------------------------------/