Commit | Line | Data |
---|---|---|
a905c0d6 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * The Keccak-p[1600, n] permutation | |
4 | * | |
5 | * (c) 2017 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Catacomb. | |
11 | * | |
12 | * Catacomb is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU Library General Public License as | |
14 | * published by the Free Software Foundation; either version 2 of the | |
15 | * License, or (at your option) any later version. | |
16 | * | |
17 | * Catacomb is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU Library General Public License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU Library General Public | |
23 | * License along with Catacomb; if not, write to the Free | |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | * MA 02111-1307, USA. | |
26 | */ | |
27 | ||
28 | /*----- Header files ------------------------------------------------------*/ | |
29 | ||
30 | #include <limits.h> | |
31 | #include <string.h> | |
32 | ||
33 | #include <mLib/bits.h> | |
34 | ||
35 | #include "keccak1600.h" | |
36 | ||
37 | /* #define KECCAK_DEBUG */ | |
38 | ||
39 | /*----- Miscellaneous utilities -------------------------------------------*/ | |
40 | ||
41 | #define I(x, y) ((x) + 5*(y)) /* Column-major indexing */ | |
42 | ||
43 | /*----- Interlacing or not ------------------------------------------------*/ | |
44 | ||
45 | /* We should prefer the interlaced representation if the target is really | |
46 | * 32-bit and only providing synthetic 64-bit integers. Alas, the Windows | |
47 | * 64-bit ABI specifies that `long' is only 32-bits (i.e., it is IL32/LLP64), | |
48 | * so detect x86 specifically. | |
49 | */ | |
50 | #if (ULONG_MAX >> 31) <= 0xffffffff && \ | |
51 | !defined(__amd64__) && !defined(_M_AMD64) | |
52 | # define KECCAK_I32 | |
53 | #endif | |
54 | ||
55 | #ifdef KECCAK_I32 | |
56 | /* A 32-bit target with at best weak support for 64-bit shifts. Maintain a | |
57 | * lane as two 32-bit pieces representing the even and odd bits of the lane. | |
58 | * There are slightly fiddly transformations to apply on the way in and out | |
59 | * of the main permutation. | |
60 | */ | |
61 | ||
62 | typedef keccak1600_lane_i32 lane; | |
63 | #define S si32 | |
64 | ||
65 | static lane interlace(kludge64 x) | |
66 | { | |
67 | /* Given a 64-bit string X, return a lane Z containing the even- and | |
68 | * odd-numbered bits of X. | |
69 | * | |
70 | * This becomes more manageable if we look at what happens to the bit | |
71 | * indices: bit i of X becomes bit ROR_6(i, 1) of Z. We can effectively | |
72 | * swap two bits of the indices by swapping the object bits where those | |
73 | * index bits differ. Fortunately, this is fairly easy. | |
74 | * | |
75 | * We arrange to swap bits between the two halves of X, rather than within | |
76 | * a half. | |
77 | */ | |
78 | ||
79 | uint32 x0 = LO64(x), x1 = HI64(x), t; | |
80 | lane z; | |
81 | /* 543210 */ | |
82 | t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 453210 */ | |
83 | t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 354210 */ | |
84 | t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 254310 */ | |
85 | t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 154320 */ | |
86 | t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 054321 */ | |
87 | z.even = x0; z.odd = x1; return (z); | |
88 | } | |
89 | ||
90 | static kludge64 deinterlace(lane x) | |
91 | { | |
92 | /* Given a lane X, return the combined 64-bit value. This is the inverse | |
93 | * to `interlace' above, and the principle is the same | |
94 | */ | |
95 | ||
96 | uint32 x0 = x.even, x1 = x.odd, t; | |
97 | kludge64 z; | |
98 | /* 054321 */ | |
99 | t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 154320 */ | |
100 | t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 254310 */ | |
101 | t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 354210 */ | |
102 | t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 453210 */ | |
103 | t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 543210 */ | |
104 | SET64(z, x1, x0); return (z); | |
105 | } | |
106 | ||
107 | #define TO_LANE(x) (interlace(x)) | |
108 | #define FROM_LANE(x) (deinterlace(x)) | |
109 | ||
110 | #define PRINTFMT_LANE "%08lx:%08lx" | |
111 | #define PRINTARGS_LANE(x) (unsigned long)(x).even, (unsigned long)(x).odd | |
112 | ||
113 | #define BINOP_LANE(z, op, x, y) \ | |
114 | ((z).even = (x).even op (y).even, (z).odd = (x).odd op (y).odd) | |
115 | #define XOR_LANE(z, x, y) BINOP_LANE(z, ^, x, y) | |
116 | #define AND_LANE(z, x, y) BINOP_LANE(z, &, x, y) | |
117 | #define OR_LANE(z, x, y) BINOP_LANE(z, |, x, y) | |
118 | #define NOT_LANE(z, x) ((z).even = ~(x).even, (z).odd = ~(x).odd) | |
119 | ||
120 | #define ROTL_LANE(z, x, n) do { \ | |
121 | lane _t = (x); \ | |
122 | (z).even = (n)%2 ? ROL32(_t.odd, ((n) + 1)/2) \ | |
123 | : ROL32(_t.even, (n)/2); \ | |
124 | (z).odd = (n)%2 ? ROL32(_t.even, ((n) - 1)/2) \ | |
125 | : ROL32(_t.odd, (n)/2); \ | |
126 | } while (0) | |
127 | ||
128 | #define LANE_ZERO { 0, 0 } | |
129 | #define LANE_CMPL { 0xffffffff, 0xffffffff } | |
130 | ||
131 | static const lane rcon[24] = { | |
132 | { 0x00000001, 0x00000000 }, { 0x00000000, 0x00000089 }, | |
133 | { 0x00000000, 0x8000008b }, { 0x00000000, 0x80008080 }, | |
134 | { 0x00000001, 0x0000008b }, { 0x00000001, 0x00008000 }, | |
135 | { 0x00000001, 0x80008088 }, { 0x00000001, 0x80000082 }, | |
136 | { 0x00000000, 0x0000000b }, { 0x00000000, 0x0000000a }, | |
137 | { 0x00000001, 0x00008082 }, { 0x00000000, 0x00008003 }, | |
138 | { 0x00000001, 0x0000808b }, { 0x00000001, 0x8000000b }, | |
139 | { 0x00000001, 0x8000008a }, { 0x00000001, 0x80000081 }, | |
140 | { 0x00000000, 0x80000081 }, { 0x00000000, 0x80000008 }, | |
141 | { 0x00000000, 0x00000083 }, { 0x00000000, 0x80008003 }, | |
142 | { 0x00000001, 0x80008088 }, { 0x00000000, 0x80000088 }, | |
143 | { 0x00000001, 0x00008000 }, { 0x00000000, 0x80008082 } | |
144 | }; | |
145 | ||
146 | #else | |
147 | /* A target with good support for 64-bit shifts. We store lanes as 64-bit | |
148 | * quantities and deal with them in the obvious, natural way. | |
149 | */ | |
150 | ||
151 | typedef keccak1600_lane_64 lane; | |
152 | #define S s64 | |
153 | ||
154 | #define TO_LANE(x) (x) | |
155 | #define FROM_LANE(x) (x) | |
156 | ||
157 | #define PRINTFMT_LANE "%08lx%08lx" | |
158 | #define PRINTARGS_LANE(x) (unsigned long)HI64(x), (unsigned long)LO64(x) | |
159 | ||
160 | #define XOR_LANE(z, x, y) XOR64((z), (x), (y)) | |
161 | #define AND_LANE(z, x, y) AND64((z), (x), (y)) | |
162 | #define OR_LANE(z, x, y) OR64((z), (x), (y)) | |
163 | #define NOT_LANE(z, x) CPL64((z), (x)) | |
164 | #define ROTL_LANE(z, x, n) ROL64_((z), (x), (n)) | |
165 | ||
166 | #define LANE_ZERO X64( 0, 0) | |
167 | #define LANE_CMPL X64(ffffffff, ffffffff) | |
168 | ||
169 | static const lane rcon[24] = { | |
170 | X64(00000000, 00000001), X64(00000000, 00008082), | |
171 | X64(80000000, 0000808a), X64(80000000, 80008000), | |
172 | X64(00000000, 0000808b), X64(00000000, 80000001), | |
173 | X64(80000000, 80008081), X64(80000000, 00008009), | |
174 | X64(00000000, 0000008a), X64(00000000, 00000088), | |
175 | X64(00000000, 80008009), X64(00000000, 8000000a), | |
176 | X64(00000000, 8000808b), X64(80000000, 0000008b), | |
177 | X64(80000000, 00008089), X64(80000000, 00008003), | |
178 | X64(80000000, 00008002), X64(80000000, 00000080), | |
179 | X64(00000000, 0000800a), X64(80000000, 8000000a), | |
180 | X64(80000000, 80008081), X64(80000000, 00008080), | |
181 | X64(00000000, 80000001), X64(80000000, 80008008) | |
182 | }; | |
183 | ||
184 | #endif | |
185 | ||
186 | /*----- Complementing or not ----------------------------------------------*/ | |
187 | ||
188 | /* We should use the complemented representation if the target doesn't have a | |
189 | * fused and-not operation. There doesn't appear to be a principled way to | |
190 | * do this, so we'll just have to make do with a big list. Worse, in my | |
191 | * brief survey of the architecture reference manuals I have lying about, | |
192 | * they've split close to 50/50 on this question, so I don't have an | |
193 | * especially good way to pick a default. The `no-fused-op' architectures | |
194 | * seem generally a bit more modern than the `fused-op' architectures, so I | |
195 | * guess I'll make the complemented representation the default. | |
196 | * | |
197 | * and-not No and-not | |
198 | * ------- ---------- | |
199 | * ARM (`bic') x86/amd64 | |
200 | * Sparc (`andn') z/Architecture | |
201 | * MMIX (`andn') MIPS | |
202 | * IA64 (`andcm') 68k | |
203 | * VAX (`bic') RISC-V | |
204 | * PDP-10 (`andc') | |
205 | */ | |
206 | #if !(defined(__arm__) || defined(__thumb__) || defined(__aarch64__) || \ | |
207 | defined(_M_ARM) || defined(_M_THUMB)) && \ | |
208 | !(defined(__ia64__) || defined(__ia64) || defined(__itanium__) || \ | |
209 | defined(_M_IA64)) && \ | |
210 | !defined(__mmix__) && \ | |
211 | !(defined(__sparc__) || defined(__sparc)) && \ | |
212 | !defined(__vax__) && \ | |
213 | !defined(__pdp10__) | |
214 | # define KECCAK_COMPL | |
215 | #endif | |
216 | ||
217 | #ifdef KECCAK_COMPL | |
218 | /* A target without fused and/not (`bic', `andc2'). We complement some of | |
219 | * the lanes in the initial state and undo this on output. (Absorbing XORs | |
220 | * input into the state, so this is unaffected.) See the handling of chi in | |
221 | * `keccak1600_round' below for the details. | |
222 | */ | |
223 | ||
1ccb258a MW |
224 | #define COMPL_MASK 0x00121106u |
225 | ||
a905c0d6 MW |
226 | #define STATE_INIT(z) do { \ |
227 | lane cmpl = LANE_CMPL; \ | |
228 | (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \ | |
229 | (z)->S[I(3, 1)] = cmpl; (z)->S[I(2, 2)] = cmpl; \ | |
230 | (z)->S[I(2, 3)] = cmpl; (z)->S[I(0, 4)] = cmpl; \ | |
231 | } while (0) | |
232 | ||
233 | #define STATE_OUT(z) do { \ | |
234 | NOT_LANE((z)->S[I(1, 0)], (z)->S[I(1, 0)]); \ | |
235 | NOT_LANE((z)->S[I(2, 0)], (z)->S[I(2, 0)]); \ | |
236 | NOT_LANE((z)->S[I(3, 1)], (z)->S[I(3, 1)]); \ | |
237 | NOT_LANE((z)->S[I(2, 2)], (z)->S[I(2, 2)]); \ | |
238 | NOT_LANE((z)->S[I(2, 3)], (z)->S[I(2, 3)]); \ | |
239 | NOT_LANE((z)->S[I(0, 4)], (z)->S[I(0, 4)]); \ | |
240 | } while (0) | |
241 | ||
242 | #else | |
243 | /* A target with fused and/not (`bic', `andc2'). Everything is simple. */ | |
244 | ||
1ccb258a MW |
245 | #define COMPL_MASK 0u |
246 | ||
a905c0d6 MW |
247 | #define STATE_INIT(z) do ; while (0) |
248 | #define STATE_OUT(z) do ; while (0) | |
249 | ||
250 | #endif | |
251 | ||
252 | /*----- Other magic constants ---------------------------------------------*/ | |
253 | ||
254 | /* The rotation constants. These are systematically named -- see `THETA_RHO' | |
255 | * below. | |
256 | */ | |
257 | #define ROT_0_0 0 | |
258 | #define ROT_1_0 1 | |
259 | #define ROT_2_0 62 | |
260 | #define ROT_3_0 28 | |
261 | #define ROT_4_0 27 | |
262 | ||
263 | #define ROT_0_1 36 | |
264 | #define ROT_1_1 44 | |
265 | #define ROT_2_1 6 | |
266 | #define ROT_3_1 55 | |
267 | #define ROT_4_1 20 | |
268 | ||
269 | #define ROT_0_2 3 | |
270 | #define ROT_1_2 10 | |
271 | #define ROT_2_2 43 | |
272 | #define ROT_3_2 25 | |
273 | #define ROT_4_2 39 | |
274 | ||
275 | #define ROT_0_3 41 | |
276 | #define ROT_1_3 45 | |
277 | #define ROT_2_3 15 | |
278 | #define ROT_3_3 21 | |
279 | #define ROT_4_3 8 | |
280 | ||
281 | #define ROT_0_4 18 | |
282 | #define ROT_1_4 2 | |
283 | #define ROT_2_4 61 | |
284 | #define ROT_3_4 56 | |
285 | #define ROT_4_4 14 | |
286 | ||
287 | /*----- Debugging ---------------------------------------------------------*/ | |
288 | ||
289 | #ifdef KECCAK_DEBUG | |
290 | ||
291 | #include <stdio.h> | |
292 | ||
293 | static void dump_state(const char *what, unsigned ir, | |
294 | const keccak1600_state *x) | |
295 | { | |
296 | unsigned i, j; | |
297 | keccak1600_state y; | |
298 | kludge64 a; | |
299 | int sep; | |
300 | ||
301 | printf(";; %s [round %u]\n", what, ir); | |
302 | printf(";; raw state...\n"); | |
303 | for (j = 0; j < 5; j++) { | |
304 | printf(";;"); | |
305 | for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') | |
306 | printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(x->S[I(i, j)])); | |
307 | fputc('\n', stdout); | |
308 | } | |
309 | y = *x; STATE_OUT(&y); | |
310 | #ifdef KECCAK_COMPL | |
311 | printf(";; uncomplemented state...\n"); | |
312 | for (j = 0; j < 5; j++) { | |
313 | printf(";;"); | |
314 | for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') | |
315 | printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(y.S[I(i, j)])); | |
316 | fputc('\n', stdout); | |
317 | } | |
318 | #endif | |
319 | #ifdef KECCAK_I32 | |
320 | printf(";; deinterlaced state...\n"); | |
321 | for (j = 0; j < 5; j++) { | |
322 | printf(";;"); | |
323 | for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') { | |
324 | a = FROM_LANE(y.S[I(i, j)]); | |
325 | printf("%c%08lx%08lx", sep, | |
326 | (unsigned long)HI64(a), (unsigned long)LO64(a)); | |
327 | } | |
328 | fputc('\n', stdout); | |
329 | } | |
330 | #endif | |
331 | fputc('\n', stdout); | |
332 | } | |
333 | ||
334 | #endif | |
335 | ||
336 | /*----- The Keccak-p[1600, n] permutation ---------------------------------*/ | |
337 | ||
338 | static void keccak1600_round(keccak1600_state *z, | |
339 | const keccak1600_state *x, unsigned i) | |
340 | { | |
341 | /* Perform a round of Keccak-p[1600, n]. Process the state X and write the | |
342 | * result to Z. | |
343 | */ | |
344 | ||
608e360f | 345 | lane c[5], d[5], t; |
a905c0d6 MW |
346 | |
347 | /* Theta, first step: calculate the column parities. */ | |
348 | #define COLPARITY(j) do { \ | |
608e360f MW |
349 | d[j] = x->S[I(j, 0)]; \ |
350 | XOR_LANE(d[j], d[j], x->S[I(j, 1)]); \ | |
351 | XOR_LANE(d[j], d[j], x->S[I(j, 2)]); \ | |
352 | XOR_LANE(d[j], d[j], x->S[I(j, 3)]); \ | |
353 | XOR_LANE(d[j], d[j], x->S[I(j, 4)]); \ | |
a905c0d6 MW |
354 | } while (0) |
355 | COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4); | |
356 | #undef COLPARITY | |
357 | ||
358 | /* Theta, second step: calculate the combined effect. */ | |
608e360f MW |
359 | ROTL_LANE(c[0], d[1], 1); XOR_LANE(c[0], c[0], d[4]); |
360 | ROTL_LANE(c[1], d[2], 1); XOR_LANE(c[1], c[1], d[0]); | |
361 | ROTL_LANE(c[2], d[3], 1); XOR_LANE(c[2], c[2], d[1]); | |
362 | ROTL_LANE(c[3], d[4], 1); XOR_LANE(c[3], c[3], d[2]); | |
363 | ROTL_LANE(c[4], d[0], 1); XOR_LANE(c[4], c[4], d[3]); | |
a905c0d6 MW |
364 | |
365 | /* Now we work plane by plane through the output. To do this, we must undo | |
366 | * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and | |
367 | * x = (y' - 3 y)/2 = 3 (y' - 3 x') = x' + 3 y'. | |
368 | */ | |
369 | #define THETA_RHO(i0, i1, i2, i3, i4) do { \ | |
370 | \ | |
371 | /* First, theta. */ \ | |
608e360f MW |
372 | XOR_LANE(d[0], x->S[I(i0, 0)], c[i0]); \ |
373 | XOR_LANE(d[1], x->S[I(i1, 1)], c[i1]); \ | |
374 | XOR_LANE(d[2], x->S[I(i2, 2)], c[i2]); \ | |
375 | XOR_LANE(d[3], x->S[I(i3, 3)], c[i3]); \ | |
376 | XOR_LANE(d[4], x->S[I(i4, 4)], c[i4]); \ | |
a905c0d6 MW |
377 | \ |
378 | /* Then rho. */ \ | |
608e360f MW |
379 | ROTL_LANE(d[0], d[0], ROT_##i0##_0); \ |
380 | ROTL_LANE(d[1], d[1], ROT_##i1##_1); \ | |
381 | ROTL_LANE(d[2], d[2], ROT_##i2##_2); \ | |
382 | ROTL_LANE(d[3], d[3], ROT_##i3##_3); \ | |
383 | ROTL_LANE(d[4], d[4], ROT_##i4##_4); \ | |
a905c0d6 MW |
384 | } while (0) |
385 | ||
386 | /* The basic chi operation is: z = w ^ (~a&b), but this involves an | |
387 | * inversion which we can mostly avoid by being clever: observe that | |
388 | * | |
389 | * w ^ (~a&~~b) = w ^ ~(a | ~b) = ~w ^ (a | ~b) | |
390 | * | |
391 | * by De Morgan's law. Furthermore, complementing w or z is basically | |
392 | * equivalent. Bertoni, Daemen, Peeters, Van Assche, and Van Keer, `Keccak | |
393 | * implementation overview', describe a pattern of lane complementation | |
394 | * which propagates through theta and pi in exactly the right way to be | |
395 | * restored easily by chi, here, with exactly one inversion per plane. | |
396 | * | |
397 | * Here's the pattern. | |
398 | * | |
399 | * [ * . * * . ] [ . * * . . ] | |
400 | * [ * . * . . ] [ . . . * . ] | |
401 | * [ * . * . . ] -> [ . . * . . ] | |
402 | * [ . * . * * ] [ . . * . . ] | |
403 | * [ * . . * . ] [ * . . . . ] | |
404 | * | |
405 | * where a `.' means that the lane is unchanged, and a `*' means that it | |
406 | * has been complemented. | |
407 | * | |
408 | * The macros `CHI_wxy_z' calculate z in terms of w, x, y assuming that the | |
409 | * inputs w, x, y marked with a `1' are complemented on input, and arrange | |
410 | * for z to be complemented on output if z is so marked. | |
411 | * | |
412 | * The diagrams to the right show the fragment of the complementation | |
413 | * pattern being handled by the corresponding line of code. A symbol in | |
414 | * brackets indicates a deviation from the input pattern forced by explicit | |
415 | * complementation: there will be exactly one of these for each plane. | |
416 | */ | |
417 | #ifdef KECCAK_COMPL | |
418 | # define CHI_COMPL(z, x) NOT_LANE((z), (x)) | |
419 | # define CHI_001_1(z, w, x, y) \ | |
420 | (OR_LANE((z), (x), (y)), XOR_LANE((z), (z), (w))) | |
421 | # define CHI_010_0(z, w, x, y) \ | |
422 | (AND_LANE((z), (x), (y)), XOR_LANE((z), (z), (w))) | |
423 | # define CHI_101_0 CHI_001_1 | |
424 | # define CHI_110_1 CHI_010_0 | |
425 | #else | |
426 | # define CHI(z, w, x, y) \ | |
427 | (NOT_LANE((z), (x)), \ | |
428 | AND_LANE((z), (z), (y)), \ | |
429 | XOR_LANE((z), (z), (w))) | |
430 | # define CHI_COMPL(z, x) ((z) = (x)) | |
431 | # define CHI_001_1 CHI | |
432 | # define CHI_010_0 CHI | |
433 | # define CHI_101_0 CHI | |
434 | # define CHI_110_1 CHI | |
435 | #endif | |
436 | ||
437 | /* Let's do the y' = 0 plane first. Theta and rho are easy with our macro, | |
438 | * and we've done pi with the coordinate hacking. That leaves chi next. | |
439 | * This is hairy because we must worry about complementation. | |
440 | */ | |
441 | THETA_RHO(0, 1, 2, 3, 4); | |
608e360f MW |
442 | CHI_COMPL(t, d[2]); /* [.] */ |
443 | CHI_101_0(z->S[I(0, 0)], d[0], d[1], d[2]); /* * . * -> . */ | |
444 | CHI_001_1(z->S[I(1, 0)], d[1], t, d[3]); /* . [.] * -> * */ | |
445 | CHI_110_1(z->S[I(2, 0)], d[2], d[3], d[4]); /* * * . -> * */ | |
446 | CHI_101_0(z->S[I(3, 0)], d[3], d[4], d[0]); /* * * . -> . */ | |
447 | CHI_010_0(z->S[I(4, 0)], d[4], d[0], d[1]); /* * . . -> . */ | |
a905c0d6 MW |
448 | |
449 | /* We'd better do iota before we forget. */ | |
450 | XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]); | |
451 | ||
452 | /* That was fun. Maybe y' = 1 will be as good. */ | |
453 | THETA_RHO(3, 4, 0, 1, 2); | |
608e360f MW |
454 | CHI_COMPL(t, d[4]); /* [*] */ |
455 | CHI_101_0(z->S[I(0, 1)], d[0], d[1], d[2]); /* * . * -> . */ | |
456 | CHI_010_0(z->S[I(1, 1)], d[1], d[2], d[3]); /* . * . -> . */ | |
457 | CHI_101_0(z->S[I(2, 1)], d[2], d[3], t); /* * . [*] -> . */ | |
458 | CHI_001_1(z->S[I(3, 1)], d[3], d[4], d[0]); /* * . . -> * */ | |
459 | CHI_010_0(z->S[I(4, 1)], d[4], d[0], d[1]); /* * . . -> . */ | |
a905c0d6 MW |
460 | |
461 | /* We're getting the hang of this. The y' = 2 plane shouldn't be any | |
462 | * trouble. | |
463 | */ | |
464 | THETA_RHO(1, 2, 3, 4, 0); | |
608e360f MW |
465 | CHI_COMPL(t, d[3]); /* [*] */ |
466 | CHI_101_0(z->S[I(0, 2)], d[0], d[1], d[2]); /* * . * -> . */ | |
467 | CHI_010_0(z->S[I(1, 2)], d[1], d[2], d[3]); /* . * . -> . */ | |
468 | CHI_110_1(z->S[I(2, 2)], d[2], t, d[4]); /* * [*] . -> * */ | |
469 | CHI_101_0(z->S[I(3, 2)], t, d[4], d[0]); /* * [*] . -> . */ | |
470 | CHI_010_0(z->S[I(4, 2)], d[4], d[0], d[1]); /* * . . -> . */ | |
a905c0d6 MW |
471 | |
472 | /* This isn't as interesting any more. Let's do y' = 3 before boredom sets | |
473 | * in. | |
474 | */ | |
475 | THETA_RHO(4, 0, 1, 2, 3); | |
608e360f MW |
476 | CHI_COMPL(t, d[3]); /* [.] */ |
477 | CHI_010_0(z->S[I(0, 3)], d[0], d[1], d[2]); /* . * . -> . */ | |
478 | CHI_101_0(z->S[I(1, 3)], d[1], d[2], d[3]); /* * . * -> . */ | |
479 | CHI_001_1(z->S[I(2, 3)], d[2], t, d[4]); /* . [.] * -> * */ | |
480 | CHI_010_0(z->S[I(3, 3)], t, d[4], d[0]); /* . [.] * -> . */ | |
481 | CHI_101_0(z->S[I(4, 3)], d[4], d[0], d[1]); /* . * * -> . */ | |
a905c0d6 MW |
482 | |
483 | /* Last plane. Just y' = 4 to go. */ | |
484 | THETA_RHO(2, 3, 4, 0, 1); | |
608e360f MW |
485 | CHI_COMPL(t, d[1]); /* [*] */ |
486 | CHI_110_1(z->S[I(0, 4)], d[0], t, d[2]); /* * [*] . -> * */ | |
487 | CHI_101_0(z->S[I(1, 4)], t, d[2], d[3]); /* [*] . * -> . */ | |
488 | CHI_010_0(z->S[I(2, 4)], d[2], d[3], d[4]); /* . * . -> . */ | |
489 | CHI_101_0(z->S[I(3, 4)], d[3], d[4], d[0]); /* * * . -> . */ | |
490 | CHI_010_0(z->S[I(4, 4)], d[4], d[0], d[1]); /* * . . -> . */ | |
a905c0d6 MW |
491 | |
492 | /* And we're done. */ | |
493 | #undef THETA_RHO | |
494 | #undef CHI_COMPL | |
495 | #undef CHI_001_1 | |
496 | #undef CHI_010_0 | |
497 | #undef CHI_101_0 | |
498 | #undef CHI_110_1 | |
499 | #undef CHI | |
500 | } | |
501 | ||
502 | /* --- @keccak1600_p@ --- * | |
503 | * | |
504 | * Arguments: @keccak1600_state *z@ = where to write the output state | |
505 | * @conts keccak1600_state *x@ = input state | |
506 | * @unsigned n@ = number of rounds to perform | |
507 | * | |
508 | * Returns: --- | |
509 | * | |
510 | * Use: Implements the %$\Keccak[1600, n]$% permutation at the core | |
511 | * of Keccak and the SHA-3 standard. | |
512 | */ | |
513 | ||
514 | void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n) | |
515 | { | |
516 | keccak1600_state u, v; | |
517 | unsigned i = 0; | |
518 | ||
519 | #ifdef KECCAK_DEBUG | |
520 | dump_state("init", 0, x); | |
521 | #endif | |
522 | keccak1600_round(&u, x, i++); n--; | |
523 | while (n > 8) { | |
524 | keccak1600_round(&v, &u, i++); | |
525 | keccak1600_round(&u, &v, i++); | |
526 | keccak1600_round(&v, &u, i++); | |
527 | keccak1600_round(&u, &v, i++); | |
528 | keccak1600_round(&v, &u, i++); | |
529 | keccak1600_round(&u, &v, i++); | |
530 | keccak1600_round(&v, &u, i++); | |
531 | keccak1600_round(&u, &v, i++); | |
532 | n -= 8; | |
533 | } | |
534 | switch (n) { | |
535 | case 7: keccak1600_round(&v, &u, i++); | |
536 | keccak1600_round(&u, &v, i++); | |
537 | case 5: keccak1600_round(&v, &u, i++); | |
538 | keccak1600_round(&u, &v, i++); | |
539 | case 3: keccak1600_round(&v, &u, i++); | |
540 | keccak1600_round(&u, &v, i++); | |
718a6891 | 541 | case 1: keccak1600_round( z, &u, i++); |
a905c0d6 MW |
542 | break; |
543 | case 8: keccak1600_round(&v, &u, i++); | |
544 | keccak1600_round(&u, &v, i++); | |
545 | case 6: keccak1600_round(&v, &u, i++); | |
546 | keccak1600_round(&u, &v, i++); | |
547 | case 4: keccak1600_round(&v, &u, i++); | |
548 | keccak1600_round(&u, &v, i++); | |
549 | case 2: keccak1600_round(&v, &u, i++); | |
718a6891 | 550 | keccak1600_round( z, &v, i++); |
a905c0d6 MW |
551 | break; |
552 | } | |
553 | #ifdef KECCAK_DEBUG | |
554 | dump_state("final", 0, z); | |
555 | #endif | |
556 | } | |
557 | ||
558 | /* --- @keccack1600_init@ --- * | |
559 | * | |
560 | * Arguments: @keccak1600_state *s@ = a state to initialize | |
561 | * | |
562 | * Returns: --- | |
563 | * | |
564 | * Use: Initialize @s@ to the root state. | |
565 | */ | |
566 | ||
567 | void keccak1600_init(keccak1600_state *s) | |
568 | { memset(s->S, 0, sizeof(s->S)); STATE_INIT(s); } | |
569 | ||
570 | /* --- @keccak1600_mix@ --- * | |
571 | * | |
572 | * Arguments: @keccak1600_state *s@ = a state to update | |
573 | * @const kludge64 *p@ = pointer to 64-bit words to mix in | |
574 | * @size_t n@ = size of the input, in 64-bit words | |
575 | * | |
576 | * Returns: --- | |
577 | * | |
578 | * Use: Mixes data into a %$\Keccak[r, 1600 - r]$% state. Note that | |
579 | * it's the caller's responsibility to pass in no more than | |
580 | * %$r$% bits of data. | |
581 | */ | |
582 | ||
583 | void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n) | |
584 | { | |
585 | unsigned i; | |
586 | lane a; | |
587 | ||
588 | for (i = 0; i < n; i++) | |
589 | { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); } | |
590 | } | |
591 | ||
10f61ef8 MW |
592 | /* --- @keccak1600_set@ --- * |
593 | * | |
594 | * Arguments: @keccak1600_state *s@ = a state to update | |
595 | * @const kludge64 *p@ = pointer to 64-bit words to mix in | |
596 | * @size_t n@ = size of the input, in 64-bit words | |
597 | * | |
598 | * Returns: --- | |
599 | * | |
600 | * Use: Stores data into a %$\Keccak[r, 1600 - r]$% state. Note that | |
601 | * it's the caller's responsibility to pass in no more than | |
602 | * %$r$% bits of data. | |
603 | * | |
604 | * This is not the operation you wanted for ordinary hashing. | |
605 | * It's provided for the use of higher-level protocols which use | |
606 | * duplexing and other fancy sponge features. | |
607 | */ | |
608 | ||
609 | void keccak1600_set(keccak1600_state *s, const kludge64 *p, size_t n) | |
610 | { | |
611 | uint32 m = COMPL_MASK; | |
612 | unsigned i; | |
613 | lane a; | |
614 | ||
615 | for (i = 0; i < n; i++) { | |
616 | a = TO_LANE(p[i]); if (m&1) NOT_LANE(a, a); | |
617 | s->S[i] = a; m >>= 1; | |
618 | } | |
619 | } | |
620 | ||
a905c0d6 MW |
621 | /* --- @keccak1600_extract@ --- * |
622 | * | |
623 | * Arguments: @const keccak1600_state *s@ = a state to extract output from | |
624 | * @kludge64 *p@ = pointer to 64-bit words to write | |
625 | * @size_t n@ = size of the output, in 64-bit words | |
626 | * | |
627 | * Returns: --- | |
628 | * | |
629 | * Use: Reads output from a %$\Keccak[r, 1600 - r]$% state. Note | |
630 | * that it's the caller's responsibility to extract no more than | |
631 | * %$r$% bits of data. | |
632 | */ | |
633 | ||
634 | void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n) | |
635 | { | |
1ccb258a | 636 | uint32 m = COMPL_MASK; |
a905c0d6 | 637 | unsigned i; |
1ccb258a | 638 | lane t; |
a905c0d6 | 639 | |
1ccb258a MW |
640 | for (i = 0; i < n; i++) { |
641 | t = s->S[i]; if (m&1) NOT_LANE(t, t); | |
642 | *p++ = FROM_LANE(t); m >>= 1; | |
643 | } | |
a905c0d6 MW |
644 | } |
645 | ||
646 | /*----- Test rig ----------------------------------------------------------*/ | |
647 | ||
648 | #ifdef TEST_RIG | |
649 | ||
650 | #include <stdio.h> | |
651 | ||
141c1284 | 652 | #include <mLib/macros.h> |
a905c0d6 MW |
653 | #include <mLib/quis.h> |
654 | #include <mLib/report.h> | |
655 | #include <mLib/testrig.h> | |
656 | ||
657 | static int vrf_p(dstr v[]) | |
658 | { | |
659 | keccak1600_state u; | |
660 | kludge64 t[25]; | |
661 | dstr d = DSTR_INIT; | |
662 | int n; | |
663 | unsigned i; | |
664 | int ok = 1; | |
665 | ||
666 | if (v[0].len != 200) die(1, "bad input size"); | |
667 | if (v[2].len != 200) die(1, "bad output size"); | |
668 | n = *(int *)v[1].buf; | |
669 | dstr_ensure(&d, 200); d.len = 200; | |
670 | ||
671 | keccak1600_init(&u); | |
672 | for (i = 0; i < 25; i++) LOAD64_L_(t[i], v[0].buf + 8*i); | |
673 | keccak1600_mix(&u, t, 25); | |
674 | keccak1600_p(&u, &u, n); | |
675 | keccak1600_extract(&u, t, 25); | |
676 | for (i = 0; i < 25; i++) STORE64_L_(d.buf + 8*i, t[i]); | |
141c1284 | 677 | if (MEMCMP(d.buf, !=, v[2].buf, 200)) { |
a905c0d6 MW |
678 | ok = 0; |
679 | fprintf(stderr, "failed!"); | |
680 | fprintf(stderr, "\n\t input = "); type_hex.dump(&v[0], stderr); | |
681 | fprintf(stderr, "\n\t rounds = %d", n); | |
682 | fprintf(stderr, "\n\t expected = "); type_hex.dump(&v[2], stderr); | |
683 | fprintf(stderr, "\n\t calclated = "); type_hex.dump(&d, stderr); | |
684 | } | |
685 | ||
686 | dstr_destroy(&d); | |
687 | return (ok); | |
688 | } | |
689 | ||
690 | static test_chunk defs[] = { | |
691 | { "p", vrf_p, { &type_hex, &type_int, &type_hex } }, | |
692 | { 0, 0, { 0 } } | |
693 | }; | |
694 | ||
695 | int main(int argc, char *argv[]) | |
696 | { | |
697 | test_run(argc, argv, defs, SRCDIR"/t/keccak1600"); | |
698 | return (0); | |
699 | } | |
700 | ||
701 | #endif | |
702 | ||
703 | /*----- That's all, folks -------------------------------------------------*/ |