Commit | Line | Data |
---|---|---|
ceb3f0c0 | 1 | /* -*-c-*- |
ceb3f0c0 | 2 | * |
3 | * Efficient reduction modulo sparse binary polynomials | |
4 | * | |
5 | * (c) 2004 Straylight/Edgeware | |
6 | */ | |
7 | ||
45c0fd36 | 8 | /*----- Licensing notice --------------------------------------------------* |
ceb3f0c0 | 9 | * |
10 | * This file is part of Catacomb. | |
11 | * | |
12 | * Catacomb is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU Library General Public License as | |
14 | * published by the Free Software Foundation; either version 2 of the | |
15 | * License, or (at your option) any later version. | |
45c0fd36 | 16 | * |
ceb3f0c0 | 17 | * Catacomb is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU Library General Public License for more details. | |
45c0fd36 | 21 | * |
ceb3f0c0 | 22 | * You should have received a copy of the GNU Library General Public |
23 | * License along with Catacomb; if not, write to the Free | |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | * MA 02111-1307, USA. | |
26 | */ | |
27 | ||
ceb3f0c0 | 28 | /*----- Header files ------------------------------------------------------*/ |
29 | ||
30 | #include <mLib/alloc.h> | |
31 | #include <mLib/darray.h> | |
32 | #include <mLib/macros.h> | |
33 | ||
34 | #include "gf.h" | |
35 | #include "gfreduce.h" | |
36 | #include "gfreduce-exp.h" | |
37 | #include "fibrand.h" | |
38 | #include "mprand.h" | |
39 | ||
40 | /*----- Data structures ---------------------------------------------------*/ | |
41 | ||
42 | DA_DECL(instr_v, gfreduce_instr); | |
43 | ||
44 | /*----- Main code ---------------------------------------------------------*/ | |
45 | ||
46 | /* --- What's going on here? --- * | |
47 | * | |
48 | * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any | |
49 | * sense fast. Here, we do efficient reduction modulo sparse polynomials. | |
8e663045 | 50 | * (It works for arbitrary polynomials, but isn't efficient for dense ones.) |
ceb3f0c0 | 51 | * |
ce7b6e18 MW |
52 | * Suppose that %$p = x^n + p'$% where %$p' = \sum_{0\le i<n} p_i x^i$%, |
53 | * hopefully with only a few %$p_i \ne 0$%. We're going to compile %$p$% | |
54 | * into a sequence of instructions which can be used to perform reduction | |
55 | * modulo %$p$%. The important observation is that | |
56 | * %$x^n \equiv p' \pmod p$%. | |
8e663045 MW |
57 | * |
58 | * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with | |
59 | * %$0 \le n' < w$%. Let %$u(x)$% be some arbitrary polynomial. Write | |
ce7b6e18 | 60 | * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%. Then a reduction step uses |
8e663045 MW |
61 | * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has |
62 | * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this | |
63 | * makes progress towards a complete reduction. | |
64 | * | |
65 | * The compiled instruction sequence computes | |
66 | * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%. | |
ceb3f0c0 | 67 | */ |
68 | ||
69 | /* --- @gfreduce_create@ --- * | |
70 | * | |
71 | * Arguments: @gfreduce *r@ = structure to fill in | |
72 | * @mp *x@ = a (hopefully sparse) polynomial | |
73 | * | |
74 | * Returns: --- | |
75 | * | |
76 | * Use: Initializes a context structure for reduction. | |
77 | */ | |
78 | ||
8e663045 MW |
79 | struct gen { |
80 | unsigned f; /* Flags */ | |
81 | #define f_lsr 1u /* Overflow from previous word */ | |
82 | #define f_load 2u /* Outstanding @LOAD@ */ | |
d153d02e | 83 | #define f_fip 4u /* Final-pass offset is set */ |
8e663045 | 84 | instr_v iv; /* Instruction vector */ |
d153d02e | 85 | size_t fip; /* Offset for final-pass reduction */ |
8e663045 MW |
86 | size_t w; /* Currently loaded target word */ |
87 | size_t wi; /* Left-shifts for current word */ | |
d153d02e | 88 | gfreduce *r; /* Reduction context pointer */ |
8e663045 MW |
89 | }; |
90 | ||
91 | #define INSTR(g_, op_, arg_) do { \ | |
92 | struct gen *_g = (g_); \ | |
93 | instr_v *_iv = &_g->iv; \ | |
94 | size_t _i = DA_LEN(_iv); \ | |
95 | \ | |
96 | DA_ENSURE(_iv, 1); \ | |
97 | DA(_iv)[_i].op = (op_); \ | |
98 | DA(_iv)[_i].arg = (arg_); \ | |
99 | DA_EXTEND(_iv, 1); \ | |
100 | } while (0) | |
101 | ||
102 | static void emit_load(struct gen *g, size_t w) | |
103 | { | |
d153d02e MW |
104 | /* --- If this is not the low-order word then note final-pass start --- * |
105 | * | |
106 | * Once we've eliminated the whole high-degree words, there will possibly | |
107 | * remain a few high-degree bits. We can further reduce the subject | |
108 | * polynomial by subtracting an appropriate multiple of %$p'$%, but if we | |
109 | * do this naively we'll end up addressing `low-order' words beyond the | |
110 | * bottom of our input. We solve this problem by storing an alternative | |
111 | * start position for this final pass (which works because we scan bits | |
112 | * right-to-left). | |
113 | */ | |
114 | ||
115 | if (!(g->f & f_fip) && w < g->r->lim) { | |
116 | g->fip = DA_LEN(&g->iv); | |
117 | g->f |= f_fip; | |
118 | } | |
119 | ||
120 | /* --- Actually emit the instruction --- */ | |
121 | ||
8e663045 MW |
122 | INSTR(g, GFRI_LOAD, w); |
123 | g->f |= f_load; | |
124 | g->w = w; | |
125 | } | |
126 | ||
127 | static void emit_right_shifts(struct gen *g) | |
128 | { | |
129 | gfreduce_instr *ip; | |
130 | size_t i, wl; | |
131 | ||
132 | /* --- Close off the current word --- * | |
133 | * | |
134 | * If we shifted into this current word with a nonzero bit offset, then | |
135 | * we'll also need to arrange to perform a sequence of right shifts into | |
136 | * the following word, which we might as well do by scanning the | |
137 | * instruction sequence (which starts at @wi@). | |
138 | * | |
139 | * Either way, we leave a @LOAD@ unmatched if there was one before, in the | |
140 | * hope that callers have an easier time; @g->w@ is updated to reflect the | |
141 | * currently open word. | |
142 | */ | |
143 | ||
144 | if (!(g->f & f_lsr)) | |
145 | return; | |
146 | ||
147 | wl = DA_LEN(&g->iv); | |
148 | INSTR(g, GFRI_STORE, g->w); | |
149 | emit_load(g, g->w - 1); | |
150 | for (i = g->wi; i < wl; i++) { | |
151 | ip = &DA(&g->iv)[i]; | |
152 | assert(ip->op == GFRI_LSL); | |
153 | if (ip->arg) | |
154 | INSTR(g, GFRI_LSR, MPW_BITS - ip->arg); | |
155 | } | |
156 | g->f &= ~f_lsr; | |
157 | } | |
158 | ||
159 | static void ensure_loaded(struct gen *g, size_t w) | |
160 | { | |
161 | if (!(g->f & f_load)) { | |
162 | emit_load(g, w); | |
163 | g->wi = DA_LEN(&g->iv); | |
164 | } else if (w != g->w) { | |
165 | emit_right_shifts(g); | |
166 | if (w != g->w) { | |
167 | INSTR(g, GFRI_STORE, g->w); | |
168 | emit_load(g, w); | |
169 | } | |
170 | g->wi = DA_LEN(&g->iv); | |
171 | } | |
172 | } | |
173 | ||
ceb3f0c0 | 174 | void gfreduce_create(gfreduce *r, mp *p) |
175 | { | |
8e663045 | 176 | struct gen g = { 0, DA_INIT }; |
f46efa79 | 177 | unsigned long d; |
178 | unsigned dw; | |
ceb3f0c0 | 179 | mpscan sc; |
180 | unsigned long i; | |
8e663045 | 181 | size_t w, bb; |
ceb3f0c0 | 182 | |
183 | /* --- Sort out the easy stuff --- */ | |
184 | ||
d153d02e | 185 | g.r = r; |
ceb3f0c0 | 186 | d = mp_bits(p); assert(d); d--; |
187 | r->lim = d/MPW_BITS; | |
188 | dw = d%MPW_BITS; | |
189 | if (!dw) | |
190 | r->mask = 0; | |
191 | else { | |
192 | r->mask = MPW(((mpw)-1) << dw); | |
193 | r->lim++; | |
194 | } | |
195 | r->p = mp_copy(p); | |
196 | ||
8e663045 MW |
197 | /* --- How this works --- * |
198 | * | |
199 | * The instruction sequence is run with two ambient parameters: a pointer | |
200 | * (usually) just past the most significant word of the polynomial to be | |
201 | * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant | |
202 | * to add. | |
203 | * | |
204 | * The sequence visits each word of the polynomial at most once. Suppose | |
205 | * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%. | |
206 | * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where | |
207 | * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects | |
208 | * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple | |
209 | * of %$w$%) also word %$I - 1$%. | |
210 | * | |
211 | * We have four instructions: @LOAD@ reads a specified word of %$u$% into an | |
212 | * accumulator, and @STORE@ stores it back (we'll always store back to the | |
213 | * same word we most recently read, but this isn't a requirement); and | |
214 | * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into | |
215 | * the accumulator. So a typical program will contain sequences of @LSR@ | |
216 | * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs. | |
217 | * | |
218 | * We do a single right-to-left pass across %$p$%. | |
219 | */ | |
ceb3f0c0 | 220 | |
c29970a7 | 221 | bb = MPW_BITS - dw; |
8e663045 | 222 | |
ceb3f0c0 | 223 | for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) { |
224 | if (!mp_bit(&sc)) | |
225 | continue; | |
8e663045 MW |
226 | |
227 | /* --- We've found a set bit, so work out which word it affects --- * | |
228 | * | |
229 | * In general, a bit affects two words: it needs to be shifted left into | |
230 | * one, and shifted right into the next. We find the former here. | |
231 | */ | |
232 | ||
233 | w = (d - i + MPW_BITS - 1)/MPW_BITS; | |
234 | ||
235 | /* --- Concentrate on the appropriate word --- */ | |
236 | ||
237 | ensure_loaded(&g, w); | |
238 | ||
239 | /* --- Accumulate a new @LSL@ instruction --- * | |
240 | * | |
241 | * If this was a nonzero shift, then we'll need to arrange to do right | |
242 | * shifts into the following word. | |
243 | */ | |
244 | ||
245 | INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS); | |
c29970a7 | 246 | if ((bb + i)%MPW_BITS) |
8e663045 | 247 | g.f |= f_lsr; |
ceb3f0c0 | 248 | } |
249 | ||
8e663045 MW |
250 | /* --- Wrapping up --- * |
251 | * | |
252 | * We probably need a final @STORE@, and maybe a sequence of right shifts. | |
253 | */ | |
ceb3f0c0 | 254 | |
8e663045 MW |
255 | if (g.f & f_load) { |
256 | emit_right_shifts(&g); | |
257 | INSTR(&g, GFRI_STORE, g.w); | |
258 | } | |
259 | ||
d153d02e MW |
260 | /* --- Copy the instruction vector. |
261 | * | |
262 | * If we've not set a final-pass offset yet then now would be an excellent | |
263 | * time. Obviously it should be right at the end, because there's nothing | |
264 | * for a final pass to do. | |
265 | */ | |
266 | ||
8e663045 | 267 | r->in = DA_LEN(&g.iv); |
ceb3f0c0 | 268 | r->iv = xmalloc(r->in * sizeof(gfreduce_instr)); |
8e663045 | 269 | memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr)); |
d153d02e MW |
270 | |
271 | if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv); | |
272 | r->fiv = r->iv + g.fip; | |
273 | ||
8e663045 | 274 | DA_DESTROY(&g.iv); |
ceb3f0c0 | 275 | } |
276 | ||
8e663045 MW |
277 | #undef INSTR |
278 | ||
279 | #undef f_lsr | |
280 | #undef f_load | |
d153d02e | 281 | #undef f_fip |
8e663045 | 282 | |
ceb3f0c0 | 283 | /* --- @gfreduce_destroy@ --- * |
284 | * | |
285 | * Arguments: @gfreduce *r@ = structure to free | |
286 | * | |
287 | * Returns: --- | |
288 | * | |
289 | * Use: Reclaims the resources from a reduction context. | |
290 | */ | |
291 | ||
292 | void gfreduce_destroy(gfreduce *r) | |
293 | { | |
294 | mp_drop(r->p); | |
295 | xfree(r->iv); | |
296 | } | |
297 | ||
298 | /* --- @gfreduce_dump@ --- * | |
299 | * | |
300 | * Arguments: @gfreduce *r@ = structure to dump | |
301 | * @FILE *fp@ = file to dump on | |
302 | * | |
303 | * Returns: --- | |
304 | * | |
305 | * Use: Dumps a reduction context. | |
306 | */ | |
307 | ||
308 | void gfreduce_dump(gfreduce *r, FILE *fp) | |
309 | { | |
310 | size_t i; | |
311 | ||
312 | fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16); | |
313 | fprintf(fp, "\n lim = %lu; mask = %lx\n", | |
314 | (unsigned long)r->lim, (unsigned long)r->mask); | |
315 | for (i = 0; i < r->in; i++) { | |
316 | static const char *opname[] = { "load", "lsl", "lsr", "store" }; | |
d153d02e MW |
317 | if (&r->iv[i] == r->fiv) |
318 | fputs("final:\n", fp); | |
ceb3f0c0 | 319 | assert(r->iv[i].op < N(opname)); |
320 | fprintf(fp, " %s %lu\n", | |
321 | opname[r->iv[i].op], | |
322 | (unsigned long)r->iv[i].arg); | |
323 | } | |
d153d02e MW |
324 | if (&r->iv[i] == r->fiv) |
325 | fputs("final:\n", fp); | |
ceb3f0c0 | 326 | } |
327 | ||
328 | /* --- @gfreduce_do@ --- * | |
329 | * | |
330 | * Arguments: @gfreduce *r@ = reduction context | |
331 | * @mp *d@ = destination | |
332 | * @mp *x@ = source | |
333 | * | |
334 | * Returns: Destination, @x@ reduced modulo the reduction poly. | |
335 | */ | |
336 | ||
337 | static void run(const gfreduce_instr *i, const gfreduce_instr *il, | |
338 | mpw *v, mpw z) | |
339 | { | |
340 | mpw w = 0; | |
341 | ||
342 | for (; i < il; i++) { | |
343 | switch (i->op) { | |
344 | case GFRI_LOAD: w = *(v - i->arg); break; | |
345 | case GFRI_LSL: w ^= z << i->arg; break; | |
346 | case GFRI_LSR: w ^= z >> i->arg; break; | |
347 | case GFRI_STORE: *(v - i->arg) = MPW(w); break; | |
348 | default: abort(); | |
349 | } | |
350 | } | |
351 | } | |
352 | ||
353 | mp *gfreduce_do(gfreduce *r, mp *d, mp *x) | |
354 | { | |
355 | mpw *v, *vl; | |
356 | const gfreduce_instr *il; | |
357 | mpw z; | |
358 | ||
359 | /* --- Try to reuse the source's space --- */ | |
360 | ||
361 | MP_COPY(x); | |
362 | if (d) MP_DROP(d); | |
363 | MP_DEST(x, MP_LEN(x), x->f); | |
364 | ||
365 | /* --- Do the reduction --- */ | |
366 | ||
367 | il = r->iv + r->in; | |
368 | if (MP_LEN(x) >= r->lim) { | |
369 | v = x->v + r->lim; | |
370 | vl = x->vl; | |
371 | while (vl-- > v) { | |
372 | while (*vl) { | |
373 | z = *vl; | |
374 | *vl = 0; | |
375 | run(r->iv, il, vl, z); | |
376 | } | |
377 | } | |
378 | if (r->mask) { | |
379 | while (*vl & r->mask) { | |
380 | z = *vl & r->mask; | |
381 | *vl &= ~r->mask; | |
d153d02e | 382 | run(r->fiv, il, vl, z); |
ceb3f0c0 | 383 | } |
384 | } | |
385 | } | |
386 | ||
387 | /* --- Done --- */ | |
388 | ||
389 | MP_SHRINK(x); | |
390 | return (x); | |
391 | } | |
392 | ||
393 | /* --- @gfreduce_sqrt@ --- * | |
394 | * | |
395 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
396 | * @mp *d@ = destination | |
397 | * @mp *x@ = some polynomial | |
398 | * | |
399 | * Returns: The square root of @x@ modulo @r->p@, or null. | |
400 | */ | |
401 | ||
402 | mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x) | |
403 | { | |
404 | mp *y = MP_COPY(x); | |
405 | mp *z, *spare = MP_NEW; | |
406 | unsigned long m = mp_bits(r->p) - 1; | |
407 | unsigned long i; | |
408 | ||
5a19b5df MW |
409 | /* --- This is pretty easy --- * |
410 | * | |
411 | * Note that %$x = x^{2^m}$%; therefore %$(x^{2^{m-1}})^2 = x^{2^m} = x$%, | |
412 | * so %$x^{2^{m-1}}$% is the square root we seek. | |
413 | */ | |
414 | ||
ceb3f0c0 | 415 | for (i = 0; i < m - 1; i++) { |
416 | mp *t = gf_sqr(spare, y); | |
417 | spare = y; | |
418 | y = gfreduce_do(r, t, t); | |
419 | } | |
420 | z = gf_sqr(spare, y); | |
421 | z = gfreduce_do(r, z, z); | |
422 | if (!MP_EQ(x, z)) { | |
423 | mp_drop(y); | |
424 | y = 0; | |
425 | } | |
426 | mp_drop(z); | |
427 | mp_drop(d); | |
428 | return (y); | |
429 | } | |
430 | ||
431 | /* --- @gfreduce_trace@ --- * | |
432 | * | |
433 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
434 | * @mp *x@ = some polynomial | |
435 | * | |
436 | * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$% | |
5a19b5df MW |
437 | * if %$x \in \gf{2^m}$%). Since the trace is invariant under |
438 | * the Frobenius automorphism (i.e., %$\Tr(x)^2 = \Tr(x)$%), it | |
439 | * must be an element of the base field, i.e., %$\gf{2}$%, and | |
440 | * we only need a single bit to represent it. | |
ceb3f0c0 | 441 | */ |
442 | ||
443 | int gfreduce_trace(gfreduce *r, mp *x) | |
444 | { | |
445 | mp *y = MP_COPY(x); | |
446 | mp *spare = MP_NEW; | |
447 | unsigned long m = mp_bits(r->p) - 1; | |
448 | unsigned long i; | |
449 | int rc; | |
450 | ||
451 | for (i = 0; i < m - 1; i++) { | |
452 | mp *t = gf_sqr(spare, y); | |
453 | spare = y; | |
454 | y = gfreduce_do(r, t, t); | |
455 | y = gf_add(y, y, x); | |
456 | } | |
a69a3efd | 457 | rc = !MP_ZEROP(y); |
ceb3f0c0 | 458 | mp_drop(spare); |
459 | mp_drop(y); | |
460 | return (rc); | |
461 | } | |
462 | ||
463 | /* --- @gfreduce_halftrace@ --- * | |
464 | * | |
465 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
466 | * @mp *d@ = destination | |
467 | * @mp *x@ = some polynomial | |
468 | * | |
469 | * Returns: The half-trace of @x@. | |
470 | * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$% | |
471 | * if %$x \in \gf{2^m}$% with %$m$% odd). | |
472 | */ | |
473 | ||
474 | mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x) | |
475 | { | |
476 | mp *y = MP_COPY(x); | |
477 | mp *spare = MP_NEW; | |
478 | unsigned long m = mp_bits(r->p) - 1; | |
479 | unsigned long i; | |
480 | ||
481 | mp_drop(d); | |
482 | for (i = 0; i < m - 1; i += 2) { | |
483 | mp *t = gf_sqr(spare, y); | |
484 | spare = y; | |
485 | y = gfreduce_do(r, t, t); | |
486 | t = gf_sqr(spare, y); | |
487 | spare = y; | |
488 | y = gfreduce_do(r, t, t); | |
489 | y = gf_add(y, y, x); | |
490 | } | |
491 | mp_drop(spare); | |
492 | return (y); | |
493 | } | |
494 | ||
495 | /* --- @gfreduce_quadsolve@ --- * | |
496 | * | |
497 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
498 | * @mp *d@ = destination | |
499 | * @mp *x@ = some polynomial | |
500 | * | |
5a19b5df MW |
501 | * Returns: A polynomial @z@ such that %$z^2 + z = x$%, or null. |
502 | * | |
503 | * Use: Solves quadratic equations in a field with characteristic 2. | |
504 | * Suppose we have an equation %$y^2 + A y + B = 0$% where | |
505 | * %$A \ne 0$%. (If %$A = 0$% then %$y = \sqrt{B}$% and you | |
506 | * want @gfreduce_sqrt@ instead.) Use this function to solve | |
507 | * %$z^2 + z = B/A^2$%; then set %$y = A z$%, since | |
508 | * %$y^2 + y = A^2 z^2 + A^2 z = A^2 (z^2 + z) = B$% as | |
509 | * required. | |
510 | * | |
511 | * The two roots are %$z$% and %$z + 1$%; this function always | |
512 | * returns the one with zero scalar coefficient. | |
ceb3f0c0 | 513 | */ |
514 | ||
515 | mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x) | |
516 | { | |
517 | unsigned long m = mp_bits(r->p) - 1; | |
518 | mp *t; | |
519 | ||
5a19b5df MW |
520 | /* --- About the solutions --- * |
521 | * | |
522 | * Factor %$z^2 + z = z (z + 1)$%. Therefore, if %$z^2 + z = x$% and | |
523 | * %$z' = z + 1$% then %$z'^2 + z' = z^2 + 1 + z + 1 = z^2 + z$%, so | |
524 | * %$z + 1$% is the other solution. | |
525 | * | |
526 | * A solution exists if and only if %$\Tr(x) = 0$%. To see the `only if' | |
527 | * implication, recall that the trace function is linear, and hence | |
528 | * $%\Tr(z^2 + z) = \Tr(z)^2 + \Tr(z) = \Tr(z) + \Tr(z) = 0$%. The `if' | |
529 | * direction will be proven using explicit constructions captured in the | |
530 | * code below. | |
531 | */ | |
532 | ||
ceb3f0c0 | 533 | MP_COPY(x); |
5a19b5df MW |
534 | if (m & 1) { |
535 | ||
536 | /* --- A short-cut for fields with odd degree --- | |
537 | * | |
538 | * The method below works in all binary fields, but there's a quicker way | |
539 | * which works whenever the degree is odd. The half-trace is | |
540 | * %$z = \sum_{0\le i\le (m-1)/2} x^{2^{2i}}$%. Then %$z^2 + z = {}$% | |
541 | * %$\sum_{0\le i\le (m-1)/2} (x^{2^{2i}} + x^{2^{2i+1}}) = {}$% | |
542 | * %$\Tr(x) + x^{2^m} = \Tr(x) + x$%. This therefore gives us the | |
543 | * solution we want whenever %$\Tr(x) = 0$%. | |
544 | */ | |
545 | ||
ceb3f0c0 | 546 | d = gfreduce_halftrace(r, d, x); |
5a19b5df | 547 | } else { |
ceb3f0c0 | 548 | mp *z, *w, *rho = MP_NEW; |
549 | mp *spare = MP_NEW; | |
550 | grand *fr = fibrand_create(0); | |
551 | unsigned long i; | |
552 | ||
5a19b5df MW |
553 | /* --- Unpicking the magic --- * |
554 | * | |
555 | * Choose %$\rho \inr \gf{2^m}$% with %$\Tr(\rho) = 1$%. Let | |
556 | * %$z = \sum_{0\le i<m} \rho^{2^i} \sum_{0\le j<i} x^{2^j} = {}$% | |
557 | * %$\rho^2 x + \rho^4 (x + x^2) + \rho^8 (x + x^2 + x^4) + \cdots + {}$% | |
558 | * %$\rho^{2^{m-1}} (x + x^2 + x^{2^{m-2}})$%. Then %$z^2 = {}$% | |
559 | * %$\sum_{0\le i<m} \rho^{2^{i+1}} \sum_{0\le j<i} x^{2^{j+1}} = {}$% | |
560 | * %$\sum_{1\le i\le m} \rho^{2^i} \sum_{1\le j\le i} x^{2^j}$% and, | |
561 | * somewhat miraculously, %$z^2 + z = \sum_{0\le i<m} \rho^{2^i} x + {}$% | |
562 | * %$\rho \sum_{1\le i<m} x^{2^i} = x \Tr(\rho) + \rho \Tr(x)$%. Again, | |
563 | * this gives us the root we want whenever %$\Tr(x) = 0$%. | |
564 | * | |
565 | * The loop below calculates %$w = \Tr(\rho)$% and %$z$% simultaneously, | |
566 | * since the same powers of %$\rho$% are wanted in both calculations. | |
567 | */ | |
568 | ||
ceb3f0c0 | 569 | for (;;) { |
570 | rho = mprand(rho, m, fr, 0); | |
571 | z = MP_ZERO; | |
572 | w = MP_COPY(rho); | |
573 | for (i = 0; i < m - 1; i++) { | |
574 | t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t); | |
575 | t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t); | |
576 | t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t; | |
577 | z = gf_add(z, z, t); | |
578 | w = gf_add(w, w, rho); | |
579 | } | |
a69a3efd | 580 | if (!MP_ZEROP(w)) |
ceb3f0c0 | 581 | break; |
582 | MP_DROP(z); | |
583 | MP_DROP(w); | |
584 | } | |
585 | if (d) MP_DROP(d); | |
586 | MP_DROP(w); | |
587 | MP_DROP(spare); | |
588 | MP_DROP(rho); | |
589 | fr->ops->destroy(fr); | |
590 | d = z; | |
591 | } | |
592 | ||
5a19b5df MW |
593 | /* --- Check that we calculated the right answer --- * |
594 | * | |
595 | * It should be correct; if it's not then maybe the ring we're working in | |
596 | * isn't really a field. | |
597 | */ | |
598 | ||
ceb3f0c0 | 599 | t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d); |
600 | if (!MP_EQ(t, x)) { | |
601 | MP_DROP(d); | |
602 | d = 0; | |
603 | } | |
604 | MP_DROP(t); | |
605 | MP_DROP(x); | |
5a19b5df MW |
606 | |
607 | /* --- Pick a canonical root --- * | |
608 | * | |
609 | * The two roots are %$z$% and %$z + 1$%; pick the one with a zero | |
610 | * scalar coefficient just for consistency's sake. | |
611 | */ | |
612 | ||
bc985cef | 613 | if (d) d->v[0] &= ~(mpw)1; |
ceb3f0c0 | 614 | return (d); |
615 | } | |
616 | ||
617 | /* --- @gfreduce_exp@ --- * | |
618 | * | |
619 | * Arguments: @gfreduce *gr@ = pointer to reduction context | |
45c0fd36 MW |
620 | * @mp *d@ = fake destination |
621 | * @mp *a@ = base | |
622 | * @mp *e@ = exponent | |
ceb3f0c0 | 623 | * |
45c0fd36 | 624 | * Returns: Result, %$a^e \bmod m$%. |
ceb3f0c0 | 625 | */ |
626 | ||
627 | mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e) | |
628 | { | |
629 | mp *x = MP_ONE; | |
630 | mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW; | |
631 | ||
632 | MP_SHRINK(e); | |
a69a3efd | 633 | MP_COPY(a); |
634 | if (MP_ZEROP(e)) | |
ceb3f0c0 | 635 | ; |
a69a3efd | 636 | else { |
637 | if (MP_NEGP(e)) | |
638 | a = gf_modinv(a, a, gr->p); | |
639 | if (MP_LEN(e) < EXP_THRESH) | |
640 | EXP_SIMPLE(x, a, e); | |
641 | else | |
642 | EXP_WINDOW(x, a, e); | |
643 | } | |
ceb3f0c0 | 644 | mp_drop(d); |
a69a3efd | 645 | mp_drop(a); |
ceb3f0c0 | 646 | mp_drop(spare); |
647 | return (x); | |
648 | } | |
649 | ||
650 | /*----- Test rig ----------------------------------------------------------*/ | |
651 | ||
652 | #ifdef TEST_RIG | |
653 | ||
ceb3f0c0 | 654 | static int vreduce(dstr *v) |
655 | { | |
656 | mp *d = *(mp **)v[0].buf; | |
657 | mp *n = *(mp **)v[1].buf; | |
658 | mp *r = *(mp **)v[2].buf; | |
659 | mp *c; | |
660 | int ok = 1; | |
661 | gfreduce rr; | |
662 | ||
663 | gfreduce_create(&rr, d); | |
664 | c = gfreduce_do(&rr, MP_NEW, n); | |
665 | if (!MP_EQ(c, r)) { | |
666 | fprintf(stderr, "\n*** reduction failed\n*** "); | |
667 | gfreduce_dump(&rr, stderr); | |
668 | fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16); | |
669 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
670 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
671 | fprintf(stderr, "\n"); | |
672 | ok = 0; | |
673 | } | |
674 | gfreduce_destroy(&rr); | |
675 | mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c); | |
676 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
677 | return (ok); | |
678 | } | |
679 | ||
680 | static int vmodexp(dstr *v) | |
681 | { | |
682 | mp *p = *(mp **)v[0].buf; | |
683 | mp *g = *(mp **)v[1].buf; | |
684 | mp *x = *(mp **)v[2].buf; | |
685 | mp *r = *(mp **)v[3].buf; | |
686 | mp *c; | |
687 | int ok = 1; | |
688 | gfreduce rr; | |
689 | ||
690 | gfreduce_create(&rr, p); | |
691 | c = gfreduce_exp(&rr, MP_NEW, g, x); | |
692 | if (!MP_EQ(c, r)) { | |
693 | fprintf(stderr, "\n*** modexp failed\n*** "); | |
694 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
695 | fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16); | |
696 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
697 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
698 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
699 | fprintf(stderr, "\n"); | |
700 | ok = 0; | |
701 | } | |
702 | gfreduce_destroy(&rr); | |
703 | mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c); | |
704 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
705 | return (ok); | |
706 | } | |
707 | ||
708 | static int vsqrt(dstr *v) | |
709 | { | |
710 | mp *p = *(mp **)v[0].buf; | |
711 | mp *x = *(mp **)v[1].buf; | |
712 | mp *r = *(mp **)v[2].buf; | |
713 | mp *c; | |
714 | int ok = 1; | |
715 | gfreduce rr; | |
716 | ||
717 | gfreduce_create(&rr, p); | |
718 | c = gfreduce_sqrt(&rr, MP_NEW, x); | |
719 | if (!MP_EQ(c, r)) { | |
720 | fprintf(stderr, "\n*** sqrt failed\n*** "); | |
721 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
722 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
723 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
724 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
725 | fprintf(stderr, "\n"); | |
726 | ok = 0; | |
727 | } | |
728 | gfreduce_destroy(&rr); | |
729 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
730 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
731 | return (ok); | |
732 | } | |
733 | ||
734 | static int vtr(dstr *v) | |
735 | { | |
736 | mp *p = *(mp **)v[0].buf; | |
737 | mp *x = *(mp **)v[1].buf; | |
738 | int r = *(int *)v[2].buf, c; | |
739 | int ok = 1; | |
740 | gfreduce rr; | |
741 | ||
742 | gfreduce_create(&rr, p); | |
743 | c = gfreduce_trace(&rr, x); | |
744 | if (c != r) { | |
745 | fprintf(stderr, "\n*** trace failed\n*** "); | |
746 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
747 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
748 | fprintf(stderr, "\n*** c = %d", c); | |
749 | fprintf(stderr, "\n*** r = %d", r); | |
750 | fprintf(stderr, "\n"); | |
751 | ok = 0; | |
752 | } | |
753 | gfreduce_destroy(&rr); | |
45c0fd36 | 754 | mp_drop(p); mp_drop(x); |
ceb3f0c0 | 755 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
756 | return (ok); | |
757 | } | |
758 | ||
759 | static int vhftr(dstr *v) | |
760 | { | |
761 | mp *p = *(mp **)v[0].buf; | |
762 | mp *x = *(mp **)v[1].buf; | |
763 | mp *r = *(mp **)v[2].buf; | |
764 | mp *c; | |
765 | int ok = 1; | |
766 | gfreduce rr; | |
767 | ||
768 | gfreduce_create(&rr, p); | |
769 | c = gfreduce_halftrace(&rr, MP_NEW, x); | |
770 | if (!MP_EQ(c, r)) { | |
771 | fprintf(stderr, "\n*** halftrace failed\n*** "); | |
772 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
773 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
774 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
775 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
776 | fprintf(stderr, "\n"); | |
777 | ok = 0; | |
778 | } | |
779 | gfreduce_destroy(&rr); | |
780 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
781 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
782 | return (ok); | |
783 | } | |
784 | ||
785 | static int vquad(dstr *v) | |
786 | { | |
787 | mp *p = *(mp **)v[0].buf; | |
788 | mp *x = *(mp **)v[1].buf; | |
789 | mp *r = *(mp **)v[2].buf; | |
790 | mp *c; | |
791 | int ok = 1; | |
792 | gfreduce rr; | |
793 | ||
794 | gfreduce_create(&rr, p); | |
795 | c = gfreduce_quadsolve(&rr, MP_NEW, x); | |
796 | if (!MP_EQ(c, r)) { | |
797 | fprintf(stderr, "\n*** quadsolve failed\n*** "); | |
798 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
799 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
800 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
801 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
802 | fprintf(stderr, "\n"); | |
803 | ok = 0; | |
804 | } | |
805 | gfreduce_destroy(&rr); | |
806 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
807 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
808 | return (ok); | |
809 | } | |
810 | ||
811 | static test_chunk defs[] = { | |
812 | { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } }, | |
813 | { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } }, | |
814 | { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } }, | |
815 | { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } }, | |
816 | { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } }, | |
817 | { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } }, | |
818 | { 0, 0, { 0 } } | |
819 | }; | |
820 | ||
821 | int main(int argc, char *argv[]) | |
822 | { | |
0f00dc4c | 823 | test_run(argc, argv, defs, SRCDIR"/t/gfreduce"); |
ceb3f0c0 | 824 | return (0); |
825 | } | |
826 | ||
827 | #endif | |
828 | ||
829 | /*----- That's all, folks -------------------------------------------------*/ |