chiark / gitweb /
math/f25519.c, math/fgoldi.c: Remove some unused constant definitions.
[catacomb] / math / fgoldi.c
CommitLineData
266efb73
MW
1/* -*-c-*-
2 *
3 * Arithmetic in the Goldilocks field GF(2^448 - 2^224 - 1)
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include "config.h"
31
1bc00e2a 32#include "ct.h"
266efb73
MW
33#include "fgoldi.h"
34
35/*----- Basic setup -------------------------------------------------------*
36 *
37 * Let φ = 2^224; then p = φ^2 - φ - 1, and, in GF(p), we have φ^2 = φ + 1
38 * (hence the name).
39 */
40
f521d4c7
MW
41typedef fgoldi_piece piece;
42
266efb73
MW
43#if FGOLDI_IMPL == 28
44/* We represent an element of GF(p) as 16 28-bit signed integer pieces x_i:
45 * x = SUM_{0<=i<16} x_i 2^(28i).
46 */
47
f521d4c7 48 typedef int64 dblpiece;
266efb73
MW
49typedef uint32 upiece; typedef uint64 udblpiece;
50#define PIECEWD(i) 28
51#define NPIECE 16
52#define P p28
53
266efb73
MW
54#define B27 0x08000000u
55#define M28 0x0fffffffu
266efb73
MW
56#define M32 0xffffffffu
57
58#elif FGOLDI_IMPL == 12
59/* We represent an element of GF(p) as 40 signed integer pieces x_i: x =
60 * SUM_{0<=i<40} x_i 2^ceil(224i/20). Pieces i with i == 0 (mod 5) are 12
61 * bits wide; the others are 11 bits wide, so they form eight groups of 56
62 * bits.
63 */
64
f521d4c7 65 typedef int32 dblpiece;
266efb73
MW
66typedef uint16 upiece; typedef uint32 udblpiece;
67#define PIECEWD(i) ((i)%5 ? 11 : 12)
68#define NPIECE 40
69#define P p12
70
266efb73
MW
71#define B11 0x0800u
72#define B10 0x0400u
73#define M12 0xfffu
74#define M11 0x7ffu
266efb73
MW
75#define M8 0xffu
76
77#endif
78
79/*----- Debugging machinery -----------------------------------------------*/
80
81#if defined(FGOLDI_DEBUG) || defined(TEST_RIG)
82
83#include <stdio.h>
84
85#include "mp.h"
86#include "mptext.h"
87
88static mp *get_pgoldi(void)
89{
90 mp *p = MP_NEW, *t = MP_NEW;
91
92 p = mp_setbit(p, MP_ZERO, 448);
93 t = mp_setbit(t, MP_ZERO, 224);
94 p = mp_sub(p, p, t);
95 p = mp_sub(p, p, MP_ONE);
96 mp_drop(t);
97 return (p);
98}
99
100DEF_FDUMP(fdump, piece, PIECEWD, NPIECE, 56, get_pgoldi())
101
102#endif
103
104/*----- Loading and storing -----------------------------------------------*/
105
106/* --- @fgoldi_load@ --- *
107 *
108 * Arguments: @fgoldi *z@ = where to store the result
109 * @const octet xv[56]@ = source to read
110 *
111 * Returns: ---
112 *
113 * Use: Reads an element of %$\gf{2^{448} - 2^{224} - 19}$% in
114 * external representation from @xv@ and stores it in @z@.
115 *
116 * External representation is little-endian base-256. Some
117 * elements have multiple encodings, which are not produced by
118 * correct software; use of noncanonical encodings is not an
119 * error, and toleration of them is considered a performance
120 * feature.
121 */
122
123void fgoldi_load(fgoldi *z, const octet xv[56])
124{
125#if FGOLDI_IMPL == 28
126
127 unsigned i;
128 uint32 xw[14];
129 piece b, c;
130
131 /* First, read the input value as words. */
132 for (i = 0; i < 14; i++) xw[i] = LOAD32_L(xv + 4*i);
133
134 /* Extract unsigned 28-bit pieces from the words. */
135 z->P[ 0] = (xw[ 0] >> 0)&M28;
136 z->P[ 7] = (xw[ 6] >> 4)&M28;
137 z->P[ 8] = (xw[ 7] >> 0)&M28;
138 z->P[15] = (xw[13] >> 4)&M28;
139 for (i = 1; i < 7; i++) {
140 z->P[i + 0] = ((xw[i + 0] << (4*i)) | (xw[i - 1] >> (32 - 4*i)))&M28;
141 z->P[i + 8] = ((xw[i + 7] << (4*i)) | (xw[i + 6] >> (32 - 4*i)))&M28;
142 }
143
144 /* Convert the nonnegative pieces into a balanced signed representation, so
145 * each piece ends up in the interval |z_i| <= 2^27. For each piece, if
146 * its top bit is set, lend a bit leftwards; in the case of z_15, reduce
147 * this bit by adding it onto z_0 and z_8, since this is the φ^2 bit, and
148 * φ^2 = φ + 1. We delay this carry until after all of the pieces have
149 * been balanced. If we don't do this, then we have to do a more expensive
150 * test for nonzeroness to decide whether to lend a bit leftwards rather
151 * than just testing a single bit.
152 *
153 * Note that we don't try for a canonical representation here: both upper
154 * and lower bounds are achievable.
155 */
156 b = z->P[15]&B27; z->P[15] -= b << 1; c = b >> 27;
157 for (i = NPIECE - 1; i--; )
158 { b = z->P[i]&B27; z->P[i] -= b << 1; z->P[i + 1] += b >> 27; }
159 z->P[0] += c; z->P[8] += c;
160
161#elif FGOLDI_IMPL == 12
162
163 unsigned i, j, n, w, b;
164 uint32 a;
165 int c;
166
167 /* First, convert the bytes into nonnegative pieces. */
168 for (i = j = a = n = 0, w = PIECEWD(0); i < 56; i++) {
169 a |= (uint32)xv[i] << n; n += 8;
170 if (n >= w) {
171 z->P[j++] = a&MASK(w);
172 a >>= w; n -= w; w = PIECEWD(j);
173 }
174 }
175
176 /* Convert the nonnegative pieces into a balanced signed representation, so
177 * each piece ends up in the interval |z_i| <= 2^11 + 1.
178 */
179 b = z->P[39]&B10; z->P[39] -= b << 1; c = b >> 10;
180 for (i = NPIECE - 1; i--; ) {
181 w = PIECEWD(i) - 1;
182 b = z->P[i]&BIT(w);
183 z->P[i] -= b << 1;
184 z->P[i + 1] += b >> w;
185 }
186 z->P[0] += c; z->P[20] += c;
187
188#endif
189}
190
191/* --- @fgoldi_store@ --- *
192 *
193 * Arguments: @octet zv[56]@ = where to write the result
194 * @const fgoldi *x@ = the field element to write
195 *
196 * Returns: ---
197 *
198 * Use: Stores a field element in the given octet vector in external
199 * representation. A canonical encoding is always stored.
200 */
201
202void fgoldi_store(octet zv[56], const fgoldi *x)
203{
204#if FGOLDI_IMPL == 28
205
206 piece y[NPIECE], yy[NPIECE], c, d;
207 uint32 u, v;
208 mask32 m;
209 unsigned i;
210
211 for (i = 0; i < NPIECE; i++) y[i] = x->P[i];
212
213 /* First, propagate the carries. By the end of this, we'll have all of the
214 * the pieces canonically sized and positive, and maybe there'll be
215 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
216 * value will be in the half-open interval [0, φ^2). The whole represented
217 * value is then y + φ^2 c.
218 *
219 * Assume that we start out with |y_i| <= 2^30. We start off by cutting
220 * off and reducing the carry c_15 from the topmost piece, y_15. This
221 * leaves 0 <= y_15 < 2^28; and we'll have |c_15| <= 4. We'll add this
222 * onto y_0 and y_8, and propagate the carries. It's very clear that we'll
223 * end up with |y + (φ + 1) c_15 - φ^2/2| << φ^2.
224 *
225 * Here, the y_i are signed, so we must be cautious about bithacking them.
226 */
227 c = ASR(piece, y[15], 28); y[15] = (upiece)y[15]&M28; y[8] += c;
228 for (i = 0; i < NPIECE; i++)
229 { y[i] += c; c = ASR(piece, y[i], 28); y[i] = (upiece)y[i]&M28; }
230
231 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
232 * y >= p, then we should subtract p from the whole value; if c = -1 then
233 * we should add p; and otherwise we should do nothing.
234 *
235 * But conditional behaviour is bad, m'kay. So here's what we do instead.
236 *
237 * The first job is to sort out what we wanted to do. If c = -1 then we
238 * want to (a) invert the constant addend and (b) feed in a carry-in;
239 * otherwise, we don't.
240 */
241 m = SIGN(c)&M28;
242 d = m&1;
243
244 /* Now do the addition/subtraction. Remember that all of the y_i are
245 * nonnegative, so shifting and masking are safe and easy.
246 */
247 d += y[0] + (1 ^ m); yy[0] = d&M28; d >>= 28;
248 for (i = 1; i < 8; i++)
249 { d += y[i] + m; yy[i] = d&M28; d >>= 28; }
250 d += y[8] + (1 ^ m); yy[8] = d&M28; d >>= 28;
251 for (i = 9; i < 16; i++)
252 { d += y[i] + m; yy[i] = d&M28; d >>= 28; }
253
254 /* The final carry-out is in d; since we only did addition, and the y_i are
255 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
256 * if (a) c /= 0 (in which case we know that the old value was
257 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
258 * the subtraction didn't cause a borrow, so we must be in the case where
259 * p <= y < φ^2.
260 */
261 m = NONZEROP(c) | ~NONZEROP(d - 1);
262 for (i = 0; i < NPIECE; i++) y[i] = (yy[i]&m) | (y[i]&~m);
263
264 /* Extract 32-bit words from the value. */
265 for (i = 0; i < 7; i++) {
266 u = ((y[i + 0] >> (4*i)) | ((uint32)y[i + 1] << (28 - 4*i)))&M32;
267 v = ((y[i + 8] >> (4*i)) | ((uint32)y[i + 9] << (28 - 4*i)))&M32;
268 STORE32_L(zv + 4*i, u);
269 STORE32_L(zv + 4*i + 28, v);
270 }
271
272#elif FGOLDI_IMPL == 12
273
274 piece y[NPIECE], yy[NPIECE], c, d;
275 uint32 a;
276 mask32 m, mm;
277 unsigned i, j, n, w;
278
279 for (i = 0; i < NPIECE; i++) y[i] = x->P[i];
280
281 /* First, propagate the carries. By the end of this, we'll have all of the
282 * the pieces canonically sized and positive, and maybe there'll be
283 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
284 * value will be in the half-open interval [0, φ^2). The whole represented
285 * value is then y + φ^2 c.
286 *
287 * Assume that we start out with |y_i| <= 2^14. We start off by cutting
288 * off and reducing the carry c_39 from the topmost piece, y_39. This
289 * leaves 0 <= y_39 < 2^11; and we'll have |c_39| <= 16. We'll add this
290 * onto y_0 and y_20, and propagate the carries. It's very clear that
291 * we'll end up with |y + (φ + 1) c_39 - φ^2/2| << φ^2.
292 *
293 * Here, the y_i are signed, so we must be cautious about bithacking them.
294 */
295 c = ASR(piece, y[39], 11); y[39] = (piece)y[39]&M11; y[20] += c;
296 for (i = 0; i < NPIECE; i++) {
297 w = PIECEWD(i); m = (1 << w) - 1;
298 y[i] += c; c = ASR(piece, y[i], w); y[i] = (upiece)y[i]&m;
299 }
300
301 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
302 * y >= p, then we should subtract p from the whole value; if c = -1 then
303 * we should add p; and otherwise we should do nothing.
304 *
305 * But conditional behaviour is bad, m'kay. So here's what we do instead.
306 *
307 * The first job is to sort out what we wanted to do. If c = -1 then we
308 * want to (a) invert the constant addend and (b) feed in a carry-in;
309 * otherwise, we don't.
310 */
311 mm = SIGN(c);
312 d = m&1;
313
314 /* Now do the addition/subtraction. Remember that all of the y_i are
315 * nonnegative, so shifting and masking are safe and easy.
316 */
317 d += y[ 0] + (1 ^ (mm&M12)); yy[ 0] = d&M12; d >>= 12;
318 for (i = 1; i < 20; i++) {
319 w = PIECEWD(i); m = MASK(w);
320 d += y[ i] + (mm&m); yy[ i] = d&m; d >>= w;
321 }
322 d += y[20] + (1 ^ (mm&M12)); yy[20] = d&M12; d >>= 12;
323 for (i = 21; i < 40; i++) {
324 w = PIECEWD(i); m = MASK(w);
325 d += y[ i] + (mm&m); yy[ i] = d&m; d >>= w;
326 }
327
328 /* The final carry-out is in d; since we only did addition, and the y_i are
329 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
330 * if (a) c /= 0 (in which case we know that the old value was
331 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
332 * the subtraction didn't cause a borrow, so we must be in the case where
333 * p <= y < φ^2.
334 */
335 m = NONZEROP(c) | ~NONZEROP(d - 1);
336 for (i = 0; i < NPIECE; i++) y[i] = (yy[i]&m) | (y[i]&~m);
337
338 /* Convert that back into octets. */
339 for (i = j = a = n = 0; i < NPIECE; i++) {
340 a |= (uint32)y[i] << n; n += PIECEWD(i);
341 while (n >= 8) { zv[j++] = a&M8; a >>= 8; n -= 8; }
342 }
343
344#endif
345}
346
347/* --- @fgoldi_set@ --- *
348 *
349 * Arguments: @fgoldi *z@ = where to write the result
350 * @int a@ = a small-ish constant
351 *
352 * Returns: ---
353 *
354 * Use: Sets @z@ to equal @a@.
355 */
356
357void fgoldi_set(fgoldi *x, int a)
358{
359 unsigned i;
360
361 x->P[0] = a;
362 for (i = 1; i < NPIECE; i++) x->P[i] = 0;
363}
364
365/*----- Basic arithmetic --------------------------------------------------*/
366
367/* --- @fgoldi_add@ --- *
368 *
369 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
370 * @const fgoldi *x, *y@ = two operands
371 *
372 * Returns: ---
373 *
374 * Use: Set @z@ to the sum %$x + y$%.
375 */
376
377void fgoldi_add(fgoldi *z, const fgoldi *x, const fgoldi *y)
378{
379 unsigned i;
380 for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] + y->P[i];
381}
382
383/* --- @fgoldi_sub@ --- *
384 *
385 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
386 * @const fgoldi *x, *y@ = two operands
387 *
388 * Returns: ---
389 *
390 * Use: Set @z@ to the difference %$x - y$%.
391 */
392
393void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y)
394{
395 unsigned i;
396 for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i];
397}
398
1bc00e2a
MW
399/* --- @fgoldi_neg@ --- *
400 *
401 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
402 * @const fgoldi *x@ = an operand
403 *
404 * Returns: ---
405 *
406 * Use: Set @z = -x@.
407 */
408
409void fgoldi_neg(fgoldi *z, const fgoldi *x)
410{
411 unsigned i;
412 for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i];
413}
414
266efb73
MW
415/*----- Constant-time utilities -------------------------------------------*/
416
1bc00e2a
MW
417/* --- @fgoldi_pick2@ --- *
418 *
419 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
420 * @const fgoldi *x, *y@ = two operands
421 * @uint32 m@ = a mask
422 *
423 * Returns: ---
424 *
425 * Use: If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set
426 * @z = x@. If @m@ has some other value, then scramble @z@ in
427 * an unhelpful way.
428 */
429
430void fgoldi_pick2(fgoldi *z, const fgoldi *x, const fgoldi *y, uint32 m)
431{
432 mask32 mm = FIX_MASK32(m);
433 unsigned i;
434 for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm);
435}
436
437/* --- @fgoldi_pickn@ --- *
438 *
439 * Arguments: @fgoldi *z@ = where to put the result
440 * @const fgoldi *v@ = a table of entries
441 * @size_t n@ = the number of entries in @v@
442 * @size_t i@ = an index
443 *
444 * Returns: ---
445 *
446 * Use: If @0 <= i < n < 32@ then set @z = v[i]@. If @n >= 32@ then
447 * do something unhelpful; otherwise, if @i >= n@ then set @z@
448 * to zero.
449 */
450
451void fgoldi_pickn(fgoldi *z, const fgoldi *v, size_t n, size_t i)
452{
453 uint32 b = (uint32)1 << (31 - i);
454 mask32 m;
455 unsigned j;
456
457 for (j = 0; j < NPIECE; j++) z->P[j] = 0;
458 while (n--) {
459 m = SIGN(b);
460 for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m);
461 v++; b <<= 1;
462 }
463}
464
266efb73
MW
465/* --- @fgoldi_condswap@ --- *
466 *
467 * Arguments: @fgoldi *x, *y@ = two operands
468 * @uint32 m@ = a mask
469 *
470 * Returns: ---
471 *
472 * Use: If @m@ is zero, do nothing; if @m@ is all-bits-set, then
473 * exchange @x@ and @y@. If @m@ has some other value, then
474 * scramble @x@ and @y@ in an unhelpful way.
475 */
476
477void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m)
478{
479 unsigned i;
480 mask32 mm = FIX_MASK32(m);
481
482 for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm);
483}
484
1bc00e2a
MW
485/* --- @fgoldi_condneg@ --- *
486 *
487 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
488 * @const fgoldi *x@ = an operand
489 * @uint32 m@ = a mask
490 *
491 * Returns: ---
492 *
493 * Use: If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set
494 * @z = -x@. If @m@ has some other value then scramble @z@ in
495 * an unhelpful way.
496 */
497
498void fgoldi_condneg(fgoldi *z, const fgoldi *x, uint32 m)
499{
500#ifdef NEG_TWOC
501 mask32 m_xor = FIX_MASK32(m);
502 piece m_add = m&1;
503# define CONDNEG(x) (((x) ^ m_xor) + m_add)
504#else
505 int s = PICK2(-1, +1, m);
506# define CONDNEG(x) (s*(x))
507#endif
508
509 unsigned i;
510 for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]);
511
512#undef CONDNEG
513}
514
266efb73
MW
515/*----- Multiplication ----------------------------------------------------*/
516
517#if FGOLDI_IMPL == 28
518
519/* Let B = 2^63 - 1 be the largest value such that +B and -B can be
520 * represented in a double-precision piece. On entry, it must be the case
521 * that |X_i| <= M <= B - 2^27 for some M. If this is the case, then, on
522 * exit, we will have |Z_i| <= 2^27 + M/2^27.
523 */
524#define CARRY_REDUCE(z, x) do { \
525 dblpiece _t[NPIECE], _c; \
526 unsigned _i; \
527 \
528 /* Bias the input pieces. This keeps the carries and so on centred \
529 * around zero rather than biased positive. \
530 */ \
531 for (_i = 0; _i < NPIECE; _i++) _t[_i] = (x)[_i] + B27; \
532 \
533 /* Calculate the reduced pieces. Careful with the bithacking. */ \
534 _c = ASR(dblpiece, _t[15], 28); \
535 (z)[0] = (dblpiece)((udblpiece)_t[0]&M28) - B27 + _c; \
536 for (_i = 1; _i < NPIECE; _i++) { \
537 (z)[_i] = (dblpiece)((udblpiece)_t[_i]&M28) - B27 + \
538 ASR(dblpiece, _t[_i - 1], 28); \
539 } \
540 (z)[8] += _c; \
541} while (0)
542
543#elif FGOLDI_IMPL == 12
544
545static void carry_reduce(dblpiece x[NPIECE])
546{
547 /* Initial bounds: we assume |x_i| < 2^31 - 2^27. */
548
549 unsigned i, j;
550 dblpiece c;
551
552 /* The result is nearly canonical, because we do sequential carry
553 * propagation, because smaller processors are more likely to prefer the
554 * smaller working set than the instruction-level parallelism.
555 *
556 * Start at x_37; truncate it to 10 bits, and propagate the carry to x_38.
557 * Truncate x_38 to 10 bits, and add the carry onto x_39. Truncate x_39 to
558 * 10 bits, and add the carry onto x_0 and x_20. And so on.
559 *
560 * Once we reach x_37 for the second time, we start with |x_37| <= 2^10.
561 * The carry into x_37 is at most 2^21; so the carry out into x_38 has
562 * magnitude at most 2^10. In turn, |x_38| <= 2^10 before the carry, so is
563 * now no more than 2^11 in magnitude, and the carry out into x_39 is at
564 * most 1. This leaves |x_39| <= 2^10 + 1 after carry propagation.
565 *
566 * Be careful with the bit hacking because the quantities involved are
567 * signed.
568 */
569
570 /* For each piece, we bias it so that floor division (as done by an
571 * arithmetic right shift) and modulus (as done by bitwise-AND) does the
572 * right thing.
573 */
574#define CARRY(i, wd, b, m) do { \
575 x[i] += (b); \
576 c = ASR(dblpiece, x[i], (wd)); \
577 x[i] = (dblpiece)((udblpiece)x[i]&(m)) - (b); \
578} while (0)
579
580 { CARRY(37, 11, B10, M11); }
581 { x[38] += c; CARRY(38, 11, B10, M11); }
582 { x[39] += c; CARRY(39, 11, B10, M11); }
583 x[20] += c;
584 for (i = 0; i < 35; ) {
585 { x[i] += c; CARRY( i, 12, B11, M12); i++; }
586 for (j = i + 4; i < j; ) { x[i] += c; CARRY( i, 11, B10, M11); i++; }
587 }
588 { x[i] += c; CARRY( i, 12, B11, M12); i++; }
589 while (i < 39) { x[i] += c; CARRY( i, 11, B10, M11); i++; }
590 x[39] += c;
591}
592
593#endif
594
595/* --- @fgoldi_mulconst@ --- *
596 *
597 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
598 * @const fgoldi *x@ = an operand
599 * @long a@ = a small-ish constant; %$|a| < 2^{20}$%.
600 *
601 * Returns: ---
602 *
603 * Use: Set @z@ to the product %$a x$%.
604 */
605
606void fgoldi_mulconst(fgoldi *z, const fgoldi *x, long a)
607{
608 unsigned i;
609 dblpiece zz[NPIECE], aa = a;
610
611 for (i = 0; i < NPIECE; i++) zz[i] = aa*x->P[i];
612#if FGOLDI_IMPL == 28
613 CARRY_REDUCE(z->P, zz);
614#elif FGOLDI_IMPL == 12
615 carry_reduce(zz);
616 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
617#endif
618}
619
620/* --- @fgoldi_mul@ --- *
621 *
622 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
623 * @const fgoldi *x, *y@ = two operands
624 *
625 * Returns: ---
626 *
627 * Use: Set @z@ to the product %$x y$%.
628 */
629
630void fgoldi_mul(fgoldi *z, const fgoldi *x, const fgoldi *y)
631{
632 dblpiece zz[NPIECE], u[NPIECE];
633 piece ab[NPIECE/2], cd[NPIECE/2];
634 const piece
635 *a = x->P + NPIECE/2, *b = x->P,
636 *c = y->P + NPIECE/2, *d = y->P;
637 unsigned i, j;
638
639#if FGOLDI_IMPL == 28
640
641# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
642
643#elif FGOLDI_IMPL == 12
644
645 static const unsigned short off[39] = {
646 0, 12, 23, 34, 45, 56, 68, 79, 90, 101,
647 112, 124, 135, 146, 157, 168, 180, 191, 202, 213,
648 224, 236, 247, 258, 269, 280, 292, 303, 314, 325,
649 336, 348, 359, 370, 381, 392, 404, 415, 426
650 };
651
652#define M(x,i, y,j) \
653 (((dblpiece)(x)[i]*(y)[j]) << (off[i] + off[j] - off[(i) + (j)]))
654
655#endif
656
657 /* Behold the magic.
658 *
659 * Write x = a φ + b, and y = c φ + d. Then x y = a c φ^2 +
660 * (a d + b c) φ + b d. Karatsuba and Ofman observed that a d + b c =
661 * (a + b) (c + d) - a c - b d, saving a multiplication, and Hamburg chose
662 * the prime p so that φ^2 = φ + 1. So
663 *
664 * x y = ((a + b) (c + d) - b d) φ + a c + b d
665 */
666
667 for (i = 0; i < NPIECE; i++) zz[i] = 0;
668
669 /* Our first job will be to calculate (1 - φ) b d, and write the result
670 * into z. As we do this, an interesting thing will happen. Write
671 * b d = u φ + v; then (1 - φ) b d = u φ + v - u φ^2 - v φ = (1 - φ) v - u.
672 * So, what we do is to write the product end-swapped and negated, and then
673 * we'll subtract the (negated, remember) high half from the low half.
674 */
675 for (i = 0; i < NPIECE/2; i++) {
676 for (j = 0; j < NPIECE/2 - i; j++)
677 zz[i + j + NPIECE/2] -= M(b,i, d,j);
678 for (; j < NPIECE/2; j++)
679 zz[i + j - NPIECE/2] -= M(b,i, d,j);
680 }
681 for (i = 0; i < NPIECE/2; i++)
682 zz[i] -= zz[i + NPIECE/2];
683
684 /* Next, we add on a c. There are no surprises here. */
685 for (i = 0; i < NPIECE/2; i++)
686 for (j = 0; j < NPIECE/2; j++)
687 zz[i + j] += M(a,i, c,j);
688
689 /* Now, calculate a + b and c + d. */
690 for (i = 0; i < NPIECE/2; i++)
691 { ab[i] = a[i] + b[i]; cd[i] = c[i] + d[i]; }
692
693 /* Finally (for the multiplication) we must add on (a + b) (c + d) φ.
694 * Write (a + b) (c + d) as u φ + v; then we actually want u φ^2 + v φ =
695 * v φ + (1 + φ) u. We'll store u in a temporary place and add it on
696 * twice.
697 */
698 for (i = 0; i < NPIECE; i++) u[i] = 0;
699 for (i = 0; i < NPIECE/2; i++) {
700 for (j = 0; j < NPIECE/2 - i; j++)
701 zz[i + j + NPIECE/2] += M(ab,i, cd,j);
702 for (; j < NPIECE/2; j++)
703 u[i + j - NPIECE/2] += M(ab,i, cd,j);
704 }
705 for (i = 0; i < NPIECE/2; i++)
706 { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; }
707
708#undef M
709
710#if FGOLDI_IMPL == 28
711 /* That wraps it up for the multiplication. Let's figure out some bounds.
712 * Fortunately, Karatsuba is a polynomial identity, so all of the pieces
713 * end up the way they'd be if we'd done the thing the easy way, which
714 * simplifies the analysis. Suppose we started with |x_i|, |y_i| <= 9/5
715 * 2^28. The overheads in the result are given by the coefficients of
716 *
717 * ((u^16 - 1)/(u - 1))^2 mod u^16 - u^8 - 1
718 *
719 * the greatest of which is 38. So |z_i| <= 38*81/25*2^56 < 2^63.
720 *
721 * Anyway, a round of `CARRY_REDUCE' will leave us with |z_i| < 2^27 +
722 * 2^36; and a second round will leave us with |z_i| < 2^27 + 512.
723 */
724 for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz);
725 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
726#elif FGOLDI_IMPL == 12
727 carry_reduce(zz);
728 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
729#endif
730}
731
732/* --- @fgoldi_sqr@ --- *
733 *
734 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
735 * @const fgoldi *x@ = an operand
736 *
737 * Returns: ---
738 *
739 * Use: Set @z@ to the square %$x^2$%.
740 */
741
742void fgoldi_sqr(fgoldi *z, const fgoldi *x)
743{
744#if FGOLDI_IMPL == 28
745
746 dblpiece zz[NPIECE], u[NPIECE];
747 piece ab[NPIECE];
748 const piece *a = x->P + NPIECE/2, *b = x->P;
749 unsigned i, j;
750
751# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
752
753 /* The magic is basically the same as `fgoldi_mul' above. We write
754 * x = a φ + b and use Karatsuba and the special prime shape. This time,
755 * we have
756 *
757 * x^2 = ((a + b)^2 - b^2) φ + a^2 + b^2
758 */
759
760 for (i = 0; i < NPIECE; i++) zz[i] = 0;
761
762 /* Our first job will be to calculate (1 - φ) b^2, and write the result
763 * into z. Again, this interacts pleasantly with the prime shape.
764 */
765 for (i = 0; i < NPIECE/4; i++) {
766 zz[2*i + NPIECE/2] -= M(b,i, b,i);
767 for (j = i + 1; j < NPIECE/2 - i; j++)
768 zz[i + j + NPIECE/2] -= 2*M(b,i, b,j);
769 for (; j < NPIECE/2; j++)
770 zz[i + j - NPIECE/2] -= 2*M(b,i, b,j);
771 }
772 for (; i < NPIECE/2; i++) {
773 zz[2*i - NPIECE/2] -= M(b,i, b,i);
774 for (j = i + 1; j < NPIECE/2; j++)
775 zz[i + j - NPIECE/2] -= 2*M(b,i, b,j);
776 }
777 for (i = 0; i < NPIECE/2; i++)
778 zz[i] -= zz[i + NPIECE/2];
779
780 /* Next, we add on a^2. There are no surprises here. */
781 for (i = 0; i < NPIECE/2; i++) {
782 zz[2*i] += M(a,i, a,i);
783 for (j = i + 1; j < NPIECE/2; j++)
784 zz[i + j] += 2*M(a,i, a,j);
785 }
786
787 /* Now, calculate a + b. */
788 for (i = 0; i < NPIECE/2; i++)
789 ab[i] = a[i] + b[i];
790
791 /* Finally (for the multiplication) we must add on (a + b)^2 φ.
792 * Write (a + b)^2 as u φ + v; then we actually want (u + v) φ + u. We'll
793 * store u in a temporary place and add it on twice.
794 */
795 for (i = 0; i < NPIECE; i++) u[i] = 0;
796 for (i = 0; i < NPIECE/4; i++) {
797 zz[2*i + NPIECE/2] += M(ab,i, ab,i);
798 for (j = i + 1; j < NPIECE/2 - i; j++)
799 zz[i + j + NPIECE/2] += 2*M(ab,i, ab,j);
800 for (; j < NPIECE/2; j++)
801 u[i + j - NPIECE/2] += 2*M(ab,i, ab,j);
802 }
803 for (; i < NPIECE/2; i++) {
804 u[2*i - NPIECE/2] += M(ab,i, ab,i);
805 for (j = i + 1; j < NPIECE/2; j++)
806 u[i + j - NPIECE/2] += 2*M(ab,i, ab,j);
807 }
808 for (i = 0; i < NPIECE/2; i++)
809 { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; }
810
811#undef M
812
813 /* Finally, carrying. */
814 for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz);
815 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
816
817#elif FGOLDI_IMPL == 12
818 fgoldi_mul(z, x, x);
819#endif
820}
821
822/*----- More advanced operations ------------------------------------------*/
823
824/* --- @fgoldi_inv@ --- *
825 *
826 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
827 * @const fgoldi *x@ = an operand
828 *
829 * Returns: ---
830 *
831 * Use: Stores in @z@ the multiplicative inverse %$x^{-1}$%. If
832 * %$x = 0$% then @z@ is set to zero. This is considered a
833 * feature.
834 */
835
836void fgoldi_inv(fgoldi *z, const fgoldi *x)
837{
838 fgoldi t, u;
839 unsigned i;
840
841#define SQRN(z, x, n) do { \
842 fgoldi_sqr((z), (x)); \
843 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
844} while (0)
845
846 /* Calculate x^-1 = x^(p - 2) = x^(2^448 - 2^224 - 3), which also handles
847 * x = 0 as intended. The addition chain is home-made.
848 */ /* step | value */
849 fgoldi_sqr(&u, x); /* 1 | 2 */
850 fgoldi_mul(&t, &u, x); /* 2 | 3 */
851 SQRN(&u, &t, 2); /* 4 | 12 */
852 fgoldi_mul(&t, &u, &t); /* 5 | 15 */
853 SQRN(&u, &t, 4); /* 9 | 240 */
854 fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */
855 SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */
856 fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */
857 SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */
858 fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */
859 SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */
860 fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */
861 fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */
862 fgoldi_mul(&t, &t, x); /* 43 | 2^37 - 1 */
863 SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */
864 fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */
865 SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */
866 fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */
867 SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */
868 fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */
869 fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */
870 fgoldi_mul(&u, &u, x); /* 233 | 2^223 - 1 */
871 SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */
872 fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */
873 SQRN(&t, &t, 2); /* 459 | 2^448 - 2^224 - 4 */
874 fgoldi_mul(z, &t, x); /* 460 | 2^448 - 2^224 - 3 */
875
876#undef SQRN
877}
878
1bc00e2a
MW
879/* --- @fgoldi_quosqrt@ --- *
880 *
881 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
882 * @const fgoldi *x, *y@ = two operands
883 *
884 * Returns: Zero if successful, @-1@ if %$x/y$% is not a square.
885 *
886 * Use: Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%.
887 * If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x
888 * \ne 0$% then the operation fails. If you wanted a specific
889 * square root then you'll have to pick it yourself.
890 */
891
892int fgoldi_quosqrt(fgoldi *z, const fgoldi *x, const fgoldi *y)
893{
894 fgoldi t, u, v;
895 octet xb[56], b0[56];
896 int32 rc = -1;
897 mask32 m;
898 unsigned i;
899
900#define SQRN(z, x, n) do { \
901 fgoldi_sqr((z), (x)); \
902 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
903} while (0)
904
905 /* This is, fortunately, significantly easier than the equivalent problem
906 * in GF(2^255 - 19), since p == 3 (mod 4).
907 *
908 * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has
909 * order r = (p - 1)/2. Let w = v^{(p-3)/4}. Then w^2 = v^{(p-3)/2} =
910 * u^{r-1} = 1/v = 1/x y. Clearly, then, (x w)^2 = x^2/x y = x/y, so x w
911 * is one of the square roots we seek.
912 *
913 * The addition chain, then, is a prefix of the previous one.
914 */
915 fgoldi_mul(&v, x, y);
916
917 fgoldi_sqr(&u, &v); /* 1 | 2 */
918 fgoldi_mul(&t, &u, &v); /* 2 | 3 */
919 SQRN(&u, &t, 2); /* 4 | 12 */
920 fgoldi_mul(&t, &u, &t); /* 5 | 15 */
921 SQRN(&u, &t, 4); /* 9 | 240 */
922 fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */
923 SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */
924 fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */
925 SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */
926 fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */
927 SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */
928 fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */
929 fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */
930 fgoldi_mul(&t, &t, &v); /* 43 | 2^37 - 1 */
931 SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */
932 fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */
933 SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */
934 fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */
935 SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */
936 fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */
937 fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */
938 fgoldi_mul(&u, &u, &v); /* 233 | 2^223 - 1 */
939 SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */
940 fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */
941
942#undef SQRN
943
944 /* Now we must decide whether the answer was right. We should have z^2 =
945 * x/y, so y z^2 = x.
946 *
947 * The easiest way to compare is to encode. This isn't as wasteful as it
948 * sounds: the hard part is normalizing the representations, which we have
949 * to do anyway.
950 */
951 fgoldi_mul(z, x, &t);
952 fgoldi_sqr(&t, z);
953 fgoldi_mul(&t, &t, y);
954 fgoldi_store(xb, x);
955 fgoldi_store(b0, &t);
956 m = -ct_memeq(xb, b0, 56);
957 rc = PICK2(0, rc, m);
958 return (rc);
959}
960
266efb73
MW
961/*----- Test rig ----------------------------------------------------------*/
962
963#ifdef TEST_RIG
964
965#include <mLib/report.h>
966#include <mLib/str.h>
967#include <mLib/testrig.h>
968
969static void fixdstr(dstr *d)
970{
971 if (d->len > 56)
972 die(1, "invalid length for fgoldi");
973 else if (d->len < 56) {
974 dstr_ensure(d, 56);
975 memset(d->buf + d->len, 0, 56 - d->len);
976 d->len = 56;
977 }
978}
979
980static void cvt_fgoldi(const char *buf, dstr *d)
981{
982 dstr dd = DSTR_INIT;
983
984 type_hex.cvt(buf, &dd); fixdstr(&dd);
985 dstr_ensure(d, sizeof(fgoldi)); d->len = sizeof(fgoldi);
986 fgoldi_load((fgoldi *)d->buf, (const octet *)dd.buf);
987 dstr_destroy(&dd);
988}
989
990static void dump_fgoldi(dstr *d, FILE *fp)
991 { fdump(stderr, "???", (const piece *)d->buf); }
992
993static void cvt_fgoldi_ref(const char *buf, dstr *d)
994 { type_hex.cvt(buf, d); fixdstr(d); }
995
996static void dump_fgoldi_ref(dstr *d, FILE *fp)
997{
998 fgoldi x;
999
1000 fgoldi_load(&x, (const octet *)d->buf);
1001 fdump(stderr, "???", x.P);
1002}
1003
1004static int eq(const fgoldi *x, dstr *d)
1005 { octet b[56]; fgoldi_store(b, x); return (memcmp(b, d->buf, 56) == 0); }
1006
1007static const test_type
1008 type_fgoldi = { cvt_fgoldi, dump_fgoldi },
1009 type_fgoldi_ref = { cvt_fgoldi_ref, dump_fgoldi_ref };
1010
1011#define TEST_UNOP(op) \
1012 static int vrf_##op(dstr dv[]) \
1013 { \
1014 fgoldi *x = (fgoldi *)dv[0].buf; \
1015 fgoldi z, zz; \
1016 int ok = 1; \
1017 \
1018 fgoldi_##op(&z, x); \
1019 if (!eq(&z, &dv[1])) { \
1020 ok = 0; \
1021 fprintf(stderr, "failed!\n"); \
1022 fdump(stderr, "x", x->P); \
1023 fdump(stderr, "calc", z.P); \
1024 fgoldi_load(&zz, (const octet *)dv[1].buf); \
1025 fdump(stderr, "z", zz.P); \
1026 } \
1027 \
1028 return (ok); \
1029 }
1030
1031TEST_UNOP(sqr)
1032TEST_UNOP(inv)
1bc00e2a 1033TEST_UNOP(neg)
266efb73
MW
1034
1035#define TEST_BINOP(op) \
1036 static int vrf_##op(dstr dv[]) \
1037 { \
1038 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; \
1039 fgoldi z, zz; \
1040 int ok = 1; \
1041 \
1042 fgoldi_##op(&z, x, y); \
1043 if (!eq(&z, &dv[2])) { \
1044 ok = 0; \
1045 fprintf(stderr, "failed!\n"); \
1046 fdump(stderr, "x", x->P); \
1047 fdump(stderr, "y", y->P); \
1048 fdump(stderr, "calc", z.P); \
1049 fgoldi_load(&zz, (const octet *)dv[2].buf); \
1050 fdump(stderr, "z", zz.P); \
1051 } \
1052 \
1053 return (ok); \
1054 }
1055
1056TEST_BINOP(add)
1057TEST_BINOP(sub)
1058TEST_BINOP(mul)
1059
1060static int vrf_mulc(dstr dv[])
1061{
1062 fgoldi *x = (fgoldi *)dv[0].buf;
1063 long a = *(const long *)dv[1].buf;
1064 fgoldi z, zz;
1065 int ok = 1;
1066
1067 fgoldi_mulconst(&z, x, a);
1068 if (!eq(&z, &dv[2])) {
1069 ok = 0;
1070 fprintf(stderr, "failed!\n");
1071 fdump(stderr, "x", x->P);
1072 fprintf(stderr, "a = %ld\n", a);
1073 fdump(stderr, "calc", z.P);
1074 fgoldi_load(&zz, (const octet *)dv[2].buf);
1075 fdump(stderr, "z", zz.P);
1076 }
1077
1078 return (ok);
1079}
1080
1bc00e2a
MW
1081static int vrf_condneg(dstr dv[])
1082{
1083 fgoldi *x = (fgoldi *)dv[0].buf;
1084 uint32 m = *(uint32 *)dv[1].buf;
1085 fgoldi z;
1086 int ok = 1;
1087
1088 fgoldi_condneg(&z, x, m);
1089 if (!eq(&z, &dv[2])) {
1090 ok = 0;
1091 fprintf(stderr, "failed!\n");
1092 fdump(stderr, "x", x->P);
1093 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1094 fdump(stderr, "calc z", z.P);
1095 fgoldi_load(&z, (const octet *)dv[1].buf);
1096 fdump(stderr, "want z", z.P);
1097 }
1098
1099 return (ok);
1100}
1101
1102static int vrf_pick2(dstr dv[])
1103{
1104 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1105 uint32 m = *(uint32 *)dv[2].buf;
1106 fgoldi z;
1107 int ok = 1;
1108
1109 fgoldi_pick2(&z, x, y, m);
1110 if (!eq(&z, &dv[3])) {
1111 ok = 0;
1112 fprintf(stderr, "failed!\n");
1113 fdump(stderr, "x", x->P);
1114 fdump(stderr, "y", y->P);
1115 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1116 fdump(stderr, "calc z", z.P);
1117 fgoldi_load(&z, (const octet *)dv[3].buf);
1118 fdump(stderr, "want z", z.P);
1119 }
1120
1121 return (ok);
1122}
1123
1124static int vrf_pickn(dstr dv[])
1125{
1126 dstr d = DSTR_INIT;
1127 fgoldi v[32], z;
1128 size_t i = *(uint32 *)dv[1].buf, j, n;
1129 const char *p;
1130 char *q;
1131 int ok = 1;
1132
1133 for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++)
1134 { cvt_fgoldi(p, &d); v[n] = *(fgoldi *)d.buf; }
1135
1136 fgoldi_pickn(&z, v, n, i);
1137 if (!eq(&z, &dv[2])) {
1138 ok = 0;
1139 fprintf(stderr, "failed!\n");
1140 for (j = 0; j < n; j++) {
1141 fprintf(stderr, "v[%2u]", (unsigned)j);
1142 fdump(stderr, "", v[j].P);
1143 }
1144 fprintf(stderr, "i = %u\n", (unsigned)i);
1145 fdump(stderr, "calc z", z.P);
1146 fgoldi_load(&z, (const octet *)dv[2].buf);
1147 fdump(stderr, "want z", z.P);
1148 }
1149
1150 dstr_destroy(&d);
1151 return (ok);
1152}
1153
266efb73
MW
1154static int vrf_condswap(dstr dv[])
1155{
1156 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1157 fgoldi xx = *x, yy = *y;
1158 uint32 m = *(uint32 *)dv[2].buf;
1159 int ok = 1;
1160
1161 fgoldi_condswap(&xx, &yy, m);
1162 if (!eq(&xx, &dv[3]) || !eq(&yy, &dv[4])) {
1163 ok = 0;
1164 fprintf(stderr, "failed!\n");
1165 fdump(stderr, "x", x->P);
1166 fdump(stderr, "y", y->P);
1167 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1168 fdump(stderr, "calc xx", xx.P);
1169 fdump(stderr, "calc yy", yy.P);
1170 fgoldi_load(&xx, (const octet *)dv[3].buf);
1171 fgoldi_load(&yy, (const octet *)dv[4].buf);
1172 fdump(stderr, "want xx", xx.P);
1173 fdump(stderr, "want yy", yy.P);
1174 }
1175
1176 return (ok);
1177}
1178
1bc00e2a
MW
1179static int vrf_quosqrt(dstr dv[])
1180{
1181 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1182 fgoldi z, zz;
1183 int rc;
1184 int ok = 1;
1185
1186 if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); }
1187 rc = fgoldi_quosqrt(&z, x, y);
1188 if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) {
1189 ok = 0;
1190 fprintf(stderr, "failed!\n");
1191 fdump(stderr, "x", x->P);
1192 fdump(stderr, "y", y->P);
1193 if (rc) fprintf(stderr, "calc: FAIL\n");
1194 else fdump(stderr, "calc", z.P);
1195 if (!dv[2].len)
1196 fprintf(stderr, "exp: FAIL\n");
1197 else {
1198 fgoldi_load(&zz, (const octet *)dv[2].buf);
1199 fdump(stderr, "z", zz.P);
1200 fgoldi_load(&zz, (const octet *)dv[3].buf);
1201 fdump(stderr, "z'", zz.P);
1202 }
1203 }
1204
1205 return (ok);
1206}
1207
266efb73
MW
1208static int vrf_sub_mulc_add_sub_mul(dstr dv[])
1209{
1210 fgoldi *u = (fgoldi *)dv[0].buf, *v = (fgoldi *)dv[1].buf,
1211 *w = (fgoldi *)dv[3].buf, *x = (fgoldi *)dv[4].buf,
1212 *y = (fgoldi *)dv[5].buf;
1213 long a = *(const long *)dv[2].buf;
1214 fgoldi umv, aumv, wpaumv, xmy, z, zz;
1215 int ok = 1;
1216
1217 fgoldi_sub(&umv, u, v);
1218 fgoldi_mulconst(&aumv, &umv, a);
1219 fgoldi_add(&wpaumv, w, &aumv);
1220 fgoldi_sub(&xmy, x, y);
1221 fgoldi_mul(&z, &wpaumv, &xmy);
1222
1223 if (!eq(&z, &dv[6])) {
1224 ok = 0;
1225 fprintf(stderr, "failed!\n");
1226 fdump(stderr, "u", u->P);
1227 fdump(stderr, "v", v->P);
1228 fdump(stderr, "u - v", umv.P);
1229 fprintf(stderr, "a = %ld\n", a);
1230 fdump(stderr, "a (u - v)", aumv.P);
1231 fdump(stderr, "w + a (u - v)", wpaumv.P);
1232 fdump(stderr, "x", x->P);
1233 fdump(stderr, "y", y->P);
1234 fdump(stderr, "x - y", xmy.P);
1235 fdump(stderr, "(x - y) (w + a (u - v))", z.P);
1236 fgoldi_load(&zz, (const octet *)dv[6].buf); fdump(stderr, "z", zz.P);
1237 }
1238
1239 return (ok);
1240}
1241
1242static test_chunk tests[] = {
1243 { "add", vrf_add, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1244 { "sub", vrf_sub, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1bc00e2a
MW
1245 { "neg", vrf_neg, { &type_fgoldi, &type_fgoldi_ref } },
1246 { "condneg", vrf_condneg,
1247 { &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
266efb73
MW
1248 { "mul", vrf_mul, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1249 { "mulconst", vrf_mulc, { &type_fgoldi, &type_long, &type_fgoldi_ref } },
1bc00e2a
MW
1250 { "pick2", vrf_pick2,
1251 { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
1252 { "pickn", vrf_pickn,
1253 { &type_string, &type_uint32, &type_fgoldi_ref } },
266efb73
MW
1254 { "condswap", vrf_condswap,
1255 { &type_fgoldi, &type_fgoldi, &type_uint32,
1256 &type_fgoldi_ref, &type_fgoldi_ref } },
1257 { "sqr", vrf_sqr, { &type_fgoldi, &type_fgoldi_ref } },
1258 { "inv", vrf_inv, { &type_fgoldi, &type_fgoldi_ref } },
1bc00e2a
MW
1259 { "quosqrt", vrf_quosqrt,
1260 { &type_fgoldi, &type_fgoldi, &type_hex, &type_hex } },
266efb73
MW
1261 { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul,
1262 { &type_fgoldi, &type_fgoldi, &type_long, &type_fgoldi,
1263 &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1264 { 0, 0, { 0 } }
1265};
1266
1267int main(int argc, char *argv[])
1268{
1269 test_run(argc, argv, tests, SRCDIR "/t/fgoldi");
1270 return (0);
1271}
1272
1273#endif
1274
1275/*----- That's all, folks -------------------------------------------------*/