chiark / gitweb /
symm/keccak1600.c (keccak1600_p): Minor layout tweak.
[catacomb] / symm / keccak1600.c
CommitLineData
a905c0d6
MW
1/* -*-c-*-
2 *
3 * The Keccak-p[1600, n] permutation
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <limits.h>
31#include <string.h>
32
33#include <mLib/bits.h>
34
35#include "keccak1600.h"
36
37/* #define KECCAK_DEBUG */
38
39/*----- Miscellaneous utilities -------------------------------------------*/
40
41#define I(x, y) ((x) + 5*(y)) /* Column-major indexing */
42
43/*----- Interlacing or not ------------------------------------------------*/
44
45/* We should prefer the interlaced representation if the target is really
46 * 32-bit and only providing synthetic 64-bit integers. Alas, the Windows
47 * 64-bit ABI specifies that `long' is only 32-bits (i.e., it is IL32/LLP64),
48 * so detect x86 specifically.
49 */
50#if (ULONG_MAX >> 31) <= 0xffffffff && \
51 !defined(__amd64__) && !defined(_M_AMD64)
52# define KECCAK_I32
53#endif
54
55#ifdef KECCAK_I32
56/* A 32-bit target with at best weak support for 64-bit shifts. Maintain a
57 * lane as two 32-bit pieces representing the even and odd bits of the lane.
58 * There are slightly fiddly transformations to apply on the way in and out
59 * of the main permutation.
60 */
61
62typedef keccak1600_lane_i32 lane;
63#define S si32
64
65static lane interlace(kludge64 x)
66{
67 /* Given a 64-bit string X, return a lane Z containing the even- and
68 * odd-numbered bits of X.
69 *
70 * This becomes more manageable if we look at what happens to the bit
71 * indices: bit i of X becomes bit ROR_6(i, 1) of Z. We can effectively
72 * swap two bits of the indices by swapping the object bits where those
73 * index bits differ. Fortunately, this is fairly easy.
74 *
75 * We arrange to swap bits between the two halves of X, rather than within
76 * a half.
77 */
78
79 uint32 x0 = LO64(x), x1 = HI64(x), t;
80 lane z;
81 /* 543210 */
82 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 453210 */
83 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 354210 */
84 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 254310 */
85 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 154320 */
86 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 054321 */
87 z.even = x0; z.odd = x1; return (z);
88}
89
90static kludge64 deinterlace(lane x)
91{
92 /* Given a lane X, return the combined 64-bit value. This is the inverse
93 * to `interlace' above, and the principle is the same
94 */
95
96 uint32 x0 = x.even, x1 = x.odd, t;
97 kludge64 z;
98 /* 054321 */
99 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 154320 */
100 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 254310 */
101 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 354210 */
102 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 453210 */
103 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 543210 */
104 SET64(z, x1, x0); return (z);
105}
106
107#define TO_LANE(x) (interlace(x))
108#define FROM_LANE(x) (deinterlace(x))
109
110#define PRINTFMT_LANE "%08lx:%08lx"
111#define PRINTARGS_LANE(x) (unsigned long)(x).even, (unsigned long)(x).odd
112
113#define BINOP_LANE(z, op, x, y) \
114 ((z).even = (x).even op (y).even, (z).odd = (x).odd op (y).odd)
115#define XOR_LANE(z, x, y) BINOP_LANE(z, ^, x, y)
116#define AND_LANE(z, x, y) BINOP_LANE(z, &, x, y)
117#define OR_LANE(z, x, y) BINOP_LANE(z, |, x, y)
118#define NOT_LANE(z, x) ((z).even = ~(x).even, (z).odd = ~(x).odd)
119
120#define ROTL_LANE(z, x, n) do { \
121 lane _t = (x); \
122 (z).even = (n)%2 ? ROL32(_t.odd, ((n) + 1)/2) \
123 : ROL32(_t.even, (n)/2); \
124 (z).odd = (n)%2 ? ROL32(_t.even, ((n) - 1)/2) \
125 : ROL32(_t.odd, (n)/2); \
126} while (0)
127
128#define LANE_ZERO { 0, 0 }
129#define LANE_CMPL { 0xffffffff, 0xffffffff }
130
131static const lane rcon[24] = {
132 { 0x00000001, 0x00000000 }, { 0x00000000, 0x00000089 },
133 { 0x00000000, 0x8000008b }, { 0x00000000, 0x80008080 },
134 { 0x00000001, 0x0000008b }, { 0x00000001, 0x00008000 },
135 { 0x00000001, 0x80008088 }, { 0x00000001, 0x80000082 },
136 { 0x00000000, 0x0000000b }, { 0x00000000, 0x0000000a },
137 { 0x00000001, 0x00008082 }, { 0x00000000, 0x00008003 },
138 { 0x00000001, 0x0000808b }, { 0x00000001, 0x8000000b },
139 { 0x00000001, 0x8000008a }, { 0x00000001, 0x80000081 },
140 { 0x00000000, 0x80000081 }, { 0x00000000, 0x80000008 },
141 { 0x00000000, 0x00000083 }, { 0x00000000, 0x80008003 },
142 { 0x00000001, 0x80008088 }, { 0x00000000, 0x80000088 },
143 { 0x00000001, 0x00008000 }, { 0x00000000, 0x80008082 }
144};
145
146#else
147/* A target with good support for 64-bit shifts. We store lanes as 64-bit
148 * quantities and deal with them in the obvious, natural way.
149 */
150
151typedef keccak1600_lane_64 lane;
152#define S s64
153
154#define TO_LANE(x) (x)
155#define FROM_LANE(x) (x)
156
157#define PRINTFMT_LANE "%08lx%08lx"
158#define PRINTARGS_LANE(x) (unsigned long)HI64(x), (unsigned long)LO64(x)
159
160#define XOR_LANE(z, x, y) XOR64((z), (x), (y))
161#define AND_LANE(z, x, y) AND64((z), (x), (y))
162#define OR_LANE(z, x, y) OR64((z), (x), (y))
163#define NOT_LANE(z, x) CPL64((z), (x))
164#define ROTL_LANE(z, x, n) ROL64_((z), (x), (n))
165
166#define LANE_ZERO X64( 0, 0)
167#define LANE_CMPL X64(ffffffff, ffffffff)
168
169static const lane rcon[24] = {
170 X64(00000000, 00000001), X64(00000000, 00008082),
171 X64(80000000, 0000808a), X64(80000000, 80008000),
172 X64(00000000, 0000808b), X64(00000000, 80000001),
173 X64(80000000, 80008081), X64(80000000, 00008009),
174 X64(00000000, 0000008a), X64(00000000, 00000088),
175 X64(00000000, 80008009), X64(00000000, 8000000a),
176 X64(00000000, 8000808b), X64(80000000, 0000008b),
177 X64(80000000, 00008089), X64(80000000, 00008003),
178 X64(80000000, 00008002), X64(80000000, 00000080),
179 X64(00000000, 0000800a), X64(80000000, 8000000a),
180 X64(80000000, 80008081), X64(80000000, 00008080),
181 X64(00000000, 80000001), X64(80000000, 80008008)
182};
183
184#endif
185
186/*----- Complementing or not ----------------------------------------------*/
187
188/* We should use the complemented representation if the target doesn't have a
189 * fused and-not operation. There doesn't appear to be a principled way to
190 * do this, so we'll just have to make do with a big list. Worse, in my
191 * brief survey of the architecture reference manuals I have lying about,
192 * they've split close to 50/50 on this question, so I don't have an
193 * especially good way to pick a default. The `no-fused-op' architectures
194 * seem generally a bit more modern than the `fused-op' architectures, so I
195 * guess I'll make the complemented representation the default.
196 *
197 * and-not No and-not
198 * ------- ----------
199 * ARM (`bic') x86/amd64
200 * Sparc (`andn') z/Architecture
201 * MMIX (`andn') MIPS
202 * IA64 (`andcm') 68k
203 * VAX (`bic') RISC-V
204 * PDP-10 (`andc')
205 */
206#if !(defined(__arm__) || defined(__thumb__) || defined(__aarch64__) || \
207 defined(_M_ARM) || defined(_M_THUMB)) && \
208 !(defined(__ia64__) || defined(__ia64) || defined(__itanium__) || \
209 defined(_M_IA64)) && \
210 !defined(__mmix__) && \
211 !(defined(__sparc__) || defined(__sparc)) && \
212 !defined(__vax__) && \
213 !defined(__pdp10__)
214# define KECCAK_COMPL
215#endif
216
217#ifdef KECCAK_COMPL
218/* A target without fused and/not (`bic', `andc2'). We complement some of
219 * the lanes in the initial state and undo this on output. (Absorbing XORs
220 * input into the state, so this is unaffected.) See the handling of chi in
221 * `keccak1600_round' below for the details.
222 */
223
224#define STATE_INIT(z) do { \
225 lane cmpl = LANE_CMPL; \
226 (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \
227 (z)->S[I(3, 1)] = cmpl; (z)->S[I(2, 2)] = cmpl; \
228 (z)->S[I(2, 3)] = cmpl; (z)->S[I(0, 4)] = cmpl; \
229} while (0)
230
231#define STATE_OUT(z) do { \
232 NOT_LANE((z)->S[I(1, 0)], (z)->S[I(1, 0)]); \
233 NOT_LANE((z)->S[I(2, 0)], (z)->S[I(2, 0)]); \
234 NOT_LANE((z)->S[I(3, 1)], (z)->S[I(3, 1)]); \
235 NOT_LANE((z)->S[I(2, 2)], (z)->S[I(2, 2)]); \
236 NOT_LANE((z)->S[I(2, 3)], (z)->S[I(2, 3)]); \
237 NOT_LANE((z)->S[I(0, 4)], (z)->S[I(0, 4)]); \
238} while (0)
239
240#else
241/* A target with fused and/not (`bic', `andc2'). Everything is simple. */
242
243#define STATE_INIT(z) do ; while (0)
244#define STATE_OUT(z) do ; while (0)
245
246#endif
247
248/*----- Other magic constants ---------------------------------------------*/
249
250/* The rotation constants. These are systematically named -- see `THETA_RHO'
251 * below.
252 */
253#define ROT_0_0 0
254#define ROT_1_0 1
255#define ROT_2_0 62
256#define ROT_3_0 28
257#define ROT_4_0 27
258
259#define ROT_0_1 36
260#define ROT_1_1 44
261#define ROT_2_1 6
262#define ROT_3_1 55
263#define ROT_4_1 20
264
265#define ROT_0_2 3
266#define ROT_1_2 10
267#define ROT_2_2 43
268#define ROT_3_2 25
269#define ROT_4_2 39
270
271#define ROT_0_3 41
272#define ROT_1_3 45
273#define ROT_2_3 15
274#define ROT_3_3 21
275#define ROT_4_3 8
276
277#define ROT_0_4 18
278#define ROT_1_4 2
279#define ROT_2_4 61
280#define ROT_3_4 56
281#define ROT_4_4 14
282
283/*----- Debugging ---------------------------------------------------------*/
284
285#ifdef KECCAK_DEBUG
286
287#include <stdio.h>
288
289static void dump_state(const char *what, unsigned ir,
290 const keccak1600_state *x)
291{
292 unsigned i, j;
293 keccak1600_state y;
294 kludge64 a;
295 int sep;
296
297 printf(";; %s [round %u]\n", what, ir);
298 printf(";; raw state...\n");
299 for (j = 0; j < 5; j++) {
300 printf(";;");
301 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
302 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(x->S[I(i, j)]));
303 fputc('\n', stdout);
304 }
305 y = *x; STATE_OUT(&y);
306#ifdef KECCAK_COMPL
307 printf(";; uncomplemented state...\n");
308 for (j = 0; j < 5; j++) {
309 printf(";;");
310 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
311 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(y.S[I(i, j)]));
312 fputc('\n', stdout);
313 }
314#endif
315#ifdef KECCAK_I32
316 printf(";; deinterlaced state...\n");
317 for (j = 0; j < 5; j++) {
318 printf(";;");
319 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') {
320 a = FROM_LANE(y.S[I(i, j)]);
321 printf("%c%08lx%08lx", sep,
322 (unsigned long)HI64(a), (unsigned long)LO64(a));
323 }
324 fputc('\n', stdout);
325 }
326#endif
327 fputc('\n', stdout);
328}
329
330#endif
331
332/*----- The Keccak-p[1600, n] permutation ---------------------------------*/
333
334static void keccak1600_round(keccak1600_state *z,
335 const keccak1600_state *x, unsigned i)
336{
337 /* Perform a round of Keccak-p[1600, n]. Process the state X and write the
338 * result to Z.
339 */
340
341 lane b[5], c[5], d[5], t;
342
343 /* Theta, first step: calculate the column parities. */
344#define COLPARITY(j) do { \
345 c[j] = x->S[I(j, 0)]; \
346 XOR_LANE(c[j], c[j], x->S[I(j, 1)]); \
347 XOR_LANE(c[j], c[j], x->S[I(j, 2)]); \
348 XOR_LANE(c[j], c[j], x->S[I(j, 3)]); \
349 XOR_LANE(c[j], c[j], x->S[I(j, 4)]); \
350} while (0)
351 COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4);
352#undef COLPARITY
353
354 /* Theta, second step: calculate the combined effect. */
355 ROTL_LANE(d[0], c[1], 1); XOR_LANE(d[0], d[0], c[4]);
356 ROTL_LANE(d[1], c[2], 1); XOR_LANE(d[1], d[1], c[0]);
357 ROTL_LANE(d[2], c[3], 1); XOR_LANE(d[2], d[2], c[1]);
358 ROTL_LANE(d[3], c[4], 1); XOR_LANE(d[3], d[3], c[2]);
359 ROTL_LANE(d[4], c[0], 1); XOR_LANE(d[4], d[4], c[3]);
360
361 /* Now we work plane by plane through the output. To do this, we must undo
362 * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and
363 * x = (y' - 3 y)/2 = 3 (y' - 3 x') = x' + 3 y'.
364 */
365#define THETA_RHO(i0, i1, i2, i3, i4) do { \
366 \
367 /* First, theta. */ \
368 XOR_LANE(b[0], x->S[I(i0, 0)], d[i0]); \
369 XOR_LANE(b[1], x->S[I(i1, 1)], d[i1]); \
370 XOR_LANE(b[2], x->S[I(i2, 2)], d[i2]); \
371 XOR_LANE(b[3], x->S[I(i3, 3)], d[i3]); \
372 XOR_LANE(b[4], x->S[I(i4, 4)], d[i4]); \
373 \
374 /* Then rho. */ \
375 ROTL_LANE(b[0], b[0], ROT_##i0##_0); \
376 ROTL_LANE(b[1], b[1], ROT_##i1##_1); \
377 ROTL_LANE(b[2], b[2], ROT_##i2##_2); \
378 ROTL_LANE(b[3], b[3], ROT_##i3##_3); \
379 ROTL_LANE(b[4], b[4], ROT_##i4##_4); \
380} while (0)
381
382 /* The basic chi operation is: z = w ^ (~a&b), but this involves an
383 * inversion which we can mostly avoid by being clever: observe that
384 *
385 * w ^ (~a&~~b) = w ^ ~(a | ~b) = ~w ^ (a | ~b)
386 *
387 * by De Morgan's law. Furthermore, complementing w or z is basically
388 * equivalent. Bertoni, Daemen, Peeters, Van Assche, and Van Keer, `Keccak
389 * implementation overview', describe a pattern of lane complementation
390 * which propagates through theta and pi in exactly the right way to be
391 * restored easily by chi, here, with exactly one inversion per plane.
392 *
393 * Here's the pattern.
394 *
395 * [ * . * * . ] [ . * * . . ]
396 * [ * . * . . ] [ . . . * . ]
397 * [ * . * . . ] -> [ . . * . . ]
398 * [ . * . * * ] [ . . * . . ]
399 * [ * . . * . ] [ * . . . . ]
400 *
401 * where a `.' means that the lane is unchanged, and a `*' means that it
402 * has been complemented.
403 *
404 * The macros `CHI_wxy_z' calculate z in terms of w, x, y assuming that the
405 * inputs w, x, y marked with a `1' are complemented on input, and arrange
406 * for z to be complemented on output if z is so marked.
407 *
408 * The diagrams to the right show the fragment of the complementation
409 * pattern being handled by the corresponding line of code. A symbol in
410 * brackets indicates a deviation from the input pattern forced by explicit
411 * complementation: there will be exactly one of these for each plane.
412 */
413#ifdef KECCAK_COMPL
414# define CHI_COMPL(z, x) NOT_LANE((z), (x))
415# define CHI_001_1(z, w, x, y) \
416 (OR_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
417# define CHI_010_0(z, w, x, y) \
418 (AND_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
419# define CHI_101_0 CHI_001_1
420# define CHI_110_1 CHI_010_0
421#else
422# define CHI(z, w, x, y) \
423 (NOT_LANE((z), (x)), \
424 AND_LANE((z), (z), (y)), \
425 XOR_LANE((z), (z), (w)))
426# define CHI_COMPL(z, x) ((z) = (x))
427# define CHI_001_1 CHI
428# define CHI_010_0 CHI
429# define CHI_101_0 CHI
430# define CHI_110_1 CHI
431#endif
432
433 /* Let's do the y' = 0 plane first. Theta and rho are easy with our macro,
434 * and we've done pi with the coordinate hacking. That leaves chi next.
435 * This is hairy because we must worry about complementation.
436 */
437 THETA_RHO(0, 1, 2, 3, 4);
438 CHI_COMPL(t, b[2]); /* [.] */
439 CHI_101_0(z->S[I(0, 0)], b[0], b[1], b[2]); /* * . * -> . */
440 CHI_001_1(z->S[I(1, 0)], b[1], t, b[3]); /* . [.] * -> * */
441 CHI_110_1(z->S[I(2, 0)], b[2], b[3], b[4]); /* * * . -> * */
442 CHI_101_0(z->S[I(3, 0)], b[3], b[4], b[0]); /* * * . -> . */
443 CHI_010_0(z->S[I(4, 0)], b[4], b[0], b[1]); /* * . . -> . */
444
445 /* We'd better do iota before we forget. */
446 XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]);
447
448 /* That was fun. Maybe y' = 1 will be as good. */
449 THETA_RHO(3, 4, 0, 1, 2);
450 CHI_COMPL(t, b[4]); /* [*] */
451 CHI_101_0(z->S[I(0, 1)], b[0], b[1], b[2]); /* * . * -> . */
452 CHI_010_0(z->S[I(1, 1)], b[1], b[2], b[3]); /* . * . -> . */
453 CHI_101_0(z->S[I(2, 1)], b[2], b[3], t); /* * . [*] -> . */
454 CHI_001_1(z->S[I(3, 1)], b[3], b[4], b[0]); /* * . . -> * */
455 CHI_010_0(z->S[I(4, 1)], b[4], b[0], b[1]); /* * . . -> . */
456
457 /* We're getting the hang of this. The y' = 2 plane shouldn't be any
458 * trouble.
459 */
460 THETA_RHO(1, 2, 3, 4, 0);
461 CHI_COMPL(t, b[3]); /* [*] */
462 CHI_101_0(z->S[I(0, 2)], b[0], b[1], b[2]); /* * . * -> . */
463 CHI_010_0(z->S[I(1, 2)], b[1], b[2], b[3]); /* . * . -> . */
464 CHI_110_1(z->S[I(2, 2)], b[2], t, b[4]); /* * [*] . -> * */
465 CHI_101_0(z->S[I(3, 2)], t, b[4], b[0]); /* * [*] . -> . */
466 CHI_010_0(z->S[I(4, 2)], b[4], b[0], b[1]); /* * . . -> . */
467
468 /* This isn't as interesting any more. Let's do y' = 3 before boredom sets
469 * in.
470 */
471 THETA_RHO(4, 0, 1, 2, 3);
472 CHI_COMPL(t, b[3]); /* [.] */
473 CHI_010_0(z->S[I(0, 3)], b[0], b[1], b[2]); /* . * . -> . */
474 CHI_101_0(z->S[I(1, 3)], b[1], b[2], b[3]); /* * . * -> . */
475 CHI_001_1(z->S[I(2, 3)], b[2], t, b[4]); /* . [.] * -> * */
476 CHI_010_0(z->S[I(3, 3)], t, b[4], b[0]); /* . [.] * -> . */
477 CHI_101_0(z->S[I(4, 3)], b[4], b[0], b[1]); /* . * * -> . */
478
479 /* Last plane. Just y' = 4 to go. */
480 THETA_RHO(2, 3, 4, 0, 1);
481 CHI_COMPL(t, b[1]); /* [*] */
482 CHI_110_1(z->S[I(0, 4)], b[0], t, b[2]); /* * [*] . -> * */
483 CHI_101_0(z->S[I(1, 4)], t, b[2], b[3]); /* [*] . * -> . */
484 CHI_010_0(z->S[I(2, 4)], b[2], b[3], b[4]); /* . * . -> . */
485 CHI_101_0(z->S[I(3, 4)], b[3], b[4], b[0]); /* * * . -> . */
486 CHI_010_0(z->S[I(4, 4)], b[4], b[0], b[1]); /* * . . -> . */
487
488 /* And we're done. */
489#undef THETA_RHO
490#undef CHI_COMPL
491#undef CHI_001_1
492#undef CHI_010_0
493#undef CHI_101_0
494#undef CHI_110_1
495#undef CHI
496}
497
498/* --- @keccak1600_p@ --- *
499 *
500 * Arguments: @keccak1600_state *z@ = where to write the output state
501 * @conts keccak1600_state *x@ = input state
502 * @unsigned n@ = number of rounds to perform
503 *
504 * Returns: ---
505 *
506 * Use: Implements the %$\Keccak[1600, n]$% permutation at the core
507 * of Keccak and the SHA-3 standard.
508 */
509
510void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n)
511{
512 keccak1600_state u, v;
513 unsigned i = 0;
514
515#ifdef KECCAK_DEBUG
516 dump_state("init", 0, x);
517#endif
518 keccak1600_round(&u, x, i++); n--;
519 while (n > 8) {
520 keccak1600_round(&v, &u, i++);
521 keccak1600_round(&u, &v, i++);
522 keccak1600_round(&v, &u, i++);
523 keccak1600_round(&u, &v, i++);
524 keccak1600_round(&v, &u, i++);
525 keccak1600_round(&u, &v, i++);
526 keccak1600_round(&v, &u, i++);
527 keccak1600_round(&u, &v, i++);
528 n -= 8;
529 }
530 switch (n) {
531 case 7: keccak1600_round(&v, &u, i++);
532 keccak1600_round(&u, &v, i++);
533 case 5: keccak1600_round(&v, &u, i++);
534 keccak1600_round(&u, &v, i++);
535 case 3: keccak1600_round(&v, &u, i++);
536 keccak1600_round(&u, &v, i++);
718a6891 537 case 1: keccak1600_round( z, &u, i++);
a905c0d6
MW
538 break;
539 case 8: keccak1600_round(&v, &u, i++);
540 keccak1600_round(&u, &v, i++);
541 case 6: keccak1600_round(&v, &u, i++);
542 keccak1600_round(&u, &v, i++);
543 case 4: keccak1600_round(&v, &u, i++);
544 keccak1600_round(&u, &v, i++);
545 case 2: keccak1600_round(&v, &u, i++);
718a6891 546 keccak1600_round( z, &v, i++);
a905c0d6
MW
547 break;
548 }
549#ifdef KECCAK_DEBUG
550 dump_state("final", 0, z);
551#endif
552}
553
554/* --- @keccack1600_init@ --- *
555 *
556 * Arguments: @keccak1600_state *s@ = a state to initialize
557 *
558 * Returns: ---
559 *
560 * Use: Initialize @s@ to the root state.
561 */
562
563void keccak1600_init(keccak1600_state *s)
564 { memset(s->S, 0, sizeof(s->S)); STATE_INIT(s); }
565
566/* --- @keccak1600_mix@ --- *
567 *
568 * Arguments: @keccak1600_state *s@ = a state to update
569 * @const kludge64 *p@ = pointer to 64-bit words to mix in
570 * @size_t n@ = size of the input, in 64-bit words
571 *
572 * Returns: ---
573 *
574 * Use: Mixes data into a %$\Keccak[r, 1600 - r]$% state. Note that
575 * it's the caller's responsibility to pass in no more than
576 * %$r$% bits of data.
577 */
578
579void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n)
580{
581 unsigned i;
582 lane a;
583
584 for (i = 0; i < n; i++)
585 { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); }
586}
587
588/* --- @keccak1600_extract@ --- *
589 *
590 * Arguments: @const keccak1600_state *s@ = a state to extract output from
591 * @kludge64 *p@ = pointer to 64-bit words to write
592 * @size_t n@ = size of the output, in 64-bit words
593 *
594 * Returns: ---
595 *
596 * Use: Reads output from a %$\Keccak[r, 1600 - r]$% state. Note
597 * that it's the caller's responsibility to extract no more than
598 * %$r$% bits of data.
599 */
600
601void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n)
602{
603 unsigned i;
604 keccak1600_state t;
605
606 t = *s; STATE_OUT(&t);
607 for (i = 0; i < n; i++) p[i] = FROM_LANE(t.S[i]);
608}
609
610/*----- Test rig ----------------------------------------------------------*/
611
612#ifdef TEST_RIG
613
614#include <stdio.h>
615
616#include <mLib/quis.h>
617#include <mLib/report.h>
618#include <mLib/testrig.h>
619
620static int vrf_p(dstr v[])
621{
622 keccak1600_state u;
623 kludge64 t[25];
624 dstr d = DSTR_INIT;
625 int n;
626 unsigned i;
627 int ok = 1;
628
629 if (v[0].len != 200) die(1, "bad input size");
630 if (v[2].len != 200) die(1, "bad output size");
631 n = *(int *)v[1].buf;
632 dstr_ensure(&d, 200); d.len = 200;
633
634 keccak1600_init(&u);
635 for (i = 0; i < 25; i++) LOAD64_L_(t[i], v[0].buf + 8*i);
636 keccak1600_mix(&u, t, 25);
637 keccak1600_p(&u, &u, n);
638 keccak1600_extract(&u, t, 25);
639 for (i = 0; i < 25; i++) STORE64_L_(d.buf + 8*i, t[i]);
640 if (memcmp(d.buf, v[2].buf, 200) != 0) {
641 ok = 0;
642 fprintf(stderr, "failed!");
643 fprintf(stderr, "\n\t input = "); type_hex.dump(&v[0], stderr);
644 fprintf(stderr, "\n\t rounds = %d", n);
645 fprintf(stderr, "\n\t expected = "); type_hex.dump(&v[2], stderr);
646 fprintf(stderr, "\n\t calclated = "); type_hex.dump(&d, stderr);
647 }
648
649 dstr_destroy(&d);
650 return (ok);
651}
652
653static test_chunk defs[] = {
654 { "p", vrf_p, { &type_hex, &type_int, &type_hex } },
655 { 0, 0, { 0 } }
656};
657
658int main(int argc, char *argv[])
659{
660 test_run(argc, argv, defs, SRCDIR"/t/keccak1600");
661 return (0);
662}
663
664#endif
665
666/*----- That's all, folks -------------------------------------------------*/