Commit | Line | Data |
---|---|---|

8fe3c82b | 1 | /* -*-c-*- |

2 | * | |

8656dc50 | 3 | * $Id: unihash.h,v 1.4 2004/04/08 01:36:13 mdw Exp $ |

8fe3c82b | 4 | * |

5 | * Simple and efficient universal hashing for hashtables | |

6 | * | |

7 | * (c) 2003 Straylight/Edgeware | |

8 | */ | |

9 | ||

d4efbcd9 | 10 | /*----- Licensing notice --------------------------------------------------* |

8fe3c82b | 11 | * |

12 | * This file is part of the mLib utilities library. | |

13 | * | |

14 | * mLib is free software; you can redistribute it and/or modify | |

15 | * it under the terms of the GNU Library General Public License as | |

16 | * published by the Free Software Foundation; either version 2 of the | |

17 | * License, or (at your option) any later version. | |

d4efbcd9 | 18 | * |

8fe3c82b | 19 | * mLib is distributed in the hope that it will be useful, |

20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |

21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |

22 | * GNU Library General Public License for more details. | |

d4efbcd9 | 23 | * |

8fe3c82b | 24 | * You should have received a copy of the GNU Library General Public |

25 | * License along with mLib; if not, write to the Free | |

26 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |

27 | * MA 02111-1307, USA. | |

28 | */ | |

29 | ||

8fe3c82b | 30 | #ifndef MLIB_UNIHASH_H |

31 | #define MLIB_UNIHASH_H | |

32 | ||

33 | #ifdef __cplusplus | |

34 | extern "C" { | |

35 | #endif | |

36 | ||

37 | ||

38 | /*----- Concept -----------------------------------------------------------* | |

39 | * | |

40 | * Let %$\gf{q}$% be a finite field. Choose an arbitrary %$k \inr \gf{q}$%. | |

41 | * Let %$M$% be a message. Injectively pad %$M$% and split it into blocks | |

42 | * $m_{n-1}, m_{n-2}, \ldots, m_2, m_1, m_0$% in %$\gf{q}%. | |

43 | * Then we compute | |

44 | * | |

573eadb5 | 45 | * %$H_k(M) = k^{n+1} + \sum_{0\le i<n} m_i k^{i+1}.$% |

8fe3c82b | 46 | * |

47 | * Note that %$H_0(M) = 0$% for all messages %$M$%. | |

48 | * | |

49 | * If we deal with messages at most %$\ell$% blocks long then %$H_k(\cdot)$% | |

50 | * is %$(\ell + 1)/q$%-almost universal. Moreover, if %$q = 2^f$% then | |

51 | * %$H_k(\cdot)$% is %$(\ell + 1)/q$%-almost XOR-universal. | |

52 | * | |

53 | * Proof. Let %$A$% and %$B$% be two messages, represented by | |

54 | * %$a_{n-1}, \ldots, a_0$% and %$b_{m-1}, \ldots, b_0$% respectively; and | |

55 | * choose any %$\delta \in \gf{q}$%. We must bound the probability that | |

56 | * | |

57 | * %$k^{n+1} + a_{n-1} k^{n} + \cdots + a_1 k^2 + a_0 k - {}$% | |

58 | * %$k^{m+1} - b_{m-1} k^{m} - \cdots - b_1 k^2 - b_0 k = \delta$%. | |

59 | * | |

60 | * Firstly, we claim that if %$A$% and %$B$% are distinct, there is some | |

61 | * nonzero coefficient of %$k$%. For if %$n \ne m$% then, without loss of | |

62 | * generality, let %$n > m$%, and hence the coefficient of %$k_n$% is | |

63 | * nonzero. Alternatively, if %$n = m$% then there must be some | |

64 | * %$i \in \{ 0, \ldots, n - 1 \}$% with %$a_i \ne b_i$%, for otherwise the | |

65 | * messages would be identical; but then the coefficient of %$k^{i+1}$% is | |

66 | * %$a_i - b_i \ne 0$%. | |

67 | * | |

68 | * Hence we have a polynomial equation with degree at most %$\ell + 1$%; | |

69 | * there must be at most %$\ell + 1$% solutions for %$k$%; but we choose | |

70 | * %$k$% at random from a set of %$q$%; so the equation is true with | |

71 | * probability at most %$(\ell + 1)/q$%. | |

72 | * | |

73 | * This function can be used as a simple MAC with provable security against | |

74 | * computationally unbounded adversaries. Simply XOR the hash with a random | |

75 | * string indexed from a large random pad by some nonce sent with the | |

76 | * message. The probability of a forgery attempt being successful is then | |

573eadb5 | 77 | * %$(\ell + 1)/2^t$%, where %$t$% is the tag length and %$\ell$% is the |

78 | * longest message permitted. | |

8fe3c82b | 79 | */ |

80 | ||

81 | /*----- Practicalities ----------------------------------------------------* | |

82 | * | |

83 | * We work in %$\gf{2^32}$%, represented as a field of polynomials modulo | |

573eadb5 | 84 | * %$\texttt{104c11db7}_x$% (this is the standard CRC-32 polynomial). Our |

85 | * blocks are bytes. | |

8fe3c82b | 86 | * |

87 | * The choice of a 32-bit hash is made for pragmatic reasons: we're never | |

88 | * likely to actually want all 32 bits for a real hashtable anyway. The | |

89 | * truncation result is needed to keep us afloat with smaller tables. | |

90 | * | |

91 | * We compute hashes using a slightly unrolled version of Horner's rule, | |

92 | * using the recurrence: | |

93 | * | |

94 | * %$a_{i+b} = (a_i + m_i) k^b + m_{i+1} k^{b-1} + \cdots + m_{i+b-1} k$% | |

95 | * | |

96 | * which involves one full-width multiply and %$b - 1$% one-byte multiplies; | |

97 | * the latter may be efficiently computed using a table lookup. Start with | |

98 | * %$a_0 = k$%. | |

99 | * | |

100 | * We precompute tables %$S[\cdot][\cdot][\cdot]$%, where | |

101 | * | |

102 | * %$S[u][v][w] = k^{u+1} x^{8v} w$% | |

103 | * for %$0 \le u < b$%, %$0 \le v < 4$%, %$0 \le w < 256)$%. | |

104 | * | |

105 | * A one-byte multiply is one lookup; a full-width multiply is four lookups | |

106 | * and three XORs. The processing required is then %$b + 3$% lookups and | |

107 | * %$b + 3$% XORs per batch, or %$(b + 3)/b$% lookups and XORs per byte, at | |

108 | * the expense of %$4 b$% kilobytes of tables. This compares relatively | |

109 | * favorably with CRC32. Indeed, in tests, this implementation with $b = 4$% | |

110 | * is faster than a 32-bit CRC. | |

111 | */ | |

112 | ||

113 | /*----- Header files ------------------------------------------------------*/ | |

114 | ||

115 | #include <stddef.h> | |

116 | ||

117 | #ifndef MLIB_BITS_H | |

118 | # include "bits.h" | |

119 | #endif | |

120 | ||

121 | /*----- Data structures ---------------------------------------------------*/ | |

122 | ||

123 | #define UNIHASH_NBATCH 4 | |

124 | #define UNIHASH_POLY 0x04c11db7 /* From CRC32 */ | |

125 | ||

126 | typedef struct unihash_info { | |

127 | uint32 s[UNIHASH_NBATCH][4][256]; /* S-tables as described */ | |

128 | } unihash_info; | |

129 | ||

6f444bda | 130 | /*----- A global hash-info table ------------------------------------------*/ |

131 | ||

132 | extern unihash_info unihash_global; /* Key this if you like */ | |

133 | ||

8fe3c82b | 134 | /*----- Functions provided ------------------------------------------------*/ |

135 | ||

136 | /* --- @unihash_setkey@ --- * | |

137 | * | |

138 | * Arguments: @unihash_info *i@ = where to store the precomputed tables | |

139 | * @uint32 k@ = the key to set, randomly chosen | |

140 | * | |

141 | * Returns: --- | |

142 | * | |

143 | * Use: Calculates the tables required for efficient hashing. | |

144 | */ | |

145 | ||

146 | extern void unihash_setkey(unihash_info */*i*/, uint32 /*k*/); | |

147 | ||

148 | /* --- @unihash_hash@ --- * | |

149 | * | |

150 | * Arguments: @const unihash_info *i@ = pointer to precomputed table | |

151 | * @uint32 a@ = @UNIHASH_INIT(i)@ or value from previous call | |

152 | * @const void *p@ = pointer to data to hash | |

153 | * @size_t sz@ = size of the data | |

154 | * | |

573eadb5 | 155 | * Returns: Hash of data so far. |

8fe3c82b | 156 | * |

d4efbcd9 | 157 | * Use: Hashes data. Call this as many times as needed. |

8fe3c82b | 158 | */ |

159 | ||

160 | #define UNIHASH_INIT(i) ((i)->s[0][0][1]) /* %$k$% */ | |

161 | ||

162 | extern uint32 unihash_hash(const unihash_info */*i*/, uint32 /*a*/, | |

163 | const void */*p*/, size_t /*sz*/); | |

164 | ||

165 | /* --- @unihash@ --- * | |

166 | * | |

167 | * Arguments: @const unihash_info *i@ = precomputed tables | |

168 | * @const void *p@ = pointer to data to hash | |

169 | * @size_t sz@ = size of the data | |

170 | * | |

171 | * Returns: The hash value computed. | |

172 | * | |

173 | * Use: All-in-one hashing function. No faster than using the | |

d4efbcd9 | 174 | * separate calls, but more convenient. |

8fe3c82b | 175 | */ |

176 | ||

177 | #define UNIHASH(i, p, sz) (unihash_hash((i), UNIHASH_INIT((i)), (p), (sz))) | |

178 | ||

179 | extern uint32 unihash(const unihash_info */*i*/, | |

180 | const void */*p*/, size_t /*sz*/); | |

181 | ||

182 | /*----- That's all, folks -------------------------------------------------*/ | |

183 | ||

184 | #ifdef __cplusplus | |

185 | } | |

186 | #endif | |

187 | ||

188 | #endif |