chiark - git - mdw - catacomb/blob - math/gfreduce.c

   1 /* -*-c-*-
   2  *
   3  * Efficient reduction modulo sparse binary polynomials
   4  *
   5  * (c) 2004 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Catacomb.
  11  *
  12  * Catacomb is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU Library General Public License as
  14  * published by the Free Software Foundation; either version 2 of the
  15  * License, or (at your option) any later version.
  16  *
  17  * Catacomb is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU Library General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with Catacomb; if not, write to the Free
  24  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  25  * MA 02111-1307, USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <mLib/alloc.h>
  31 #include <mLib/darray.h>
  32 #include <mLib/macros.h>
  33
  34 #include "gf.h"
  35 #include "gfreduce.h"
  36 #include "gfreduce-exp.h"
  37 #include "fibrand.h"
  38 #include "mprand.h"
  39
  40 /*----- Data structures ---------------------------------------------------*/
  41
  42 DA_DECL(instr_v, gfreduce_instr);
  43
  44 /*----- Main code ---------------------------------------------------------*/
  45
  46 /* --- What's going on here? --- *
  47  *
  48  * Let's face it, @gfx_div@ sucks.  It works (I hope), but it's not in any
  49  * sense fast.  Here, we do efficient reduction modulo sparse polynomials.
  50  * (It works for arbitrary polynomials, but isn't efficient for dense ones.)
  51  *
  52  * Suppose that %$p = x^n + p'$% where %$p' = \sum_{0\le i<n} p_i x^i$%,
  53  * hopefully with only a few %$p_i \ne 0$%.  We're going to compile %$p$%
  54  * into a sequence of instructions which can be used to perform reduction
  55  * modulo %$p$%.  The important observation is that
  56  * %$x^n \equiv p' \pmod p$%.
  57  *
  58  * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with
  59  * %$0 \le n' < w$%.  Let %$u(x)$% be some arbitrary polynomial.  Write
  60  * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%.  Then a reduction step uses
  61  * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has
  62  * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this
  63  * makes progress towards a complete reduction.
  64  *
  65  * The compiled instruction sequence computes
  66  * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%.
  67  */
  68
  69 /* --- @gfreduce_create@ --- *
  70  *
  71  * Arguments:   @gfreduce *r@ = structure to fill in
  72  *              @mp *x@ = a (hopefully sparse) polynomial
  73  *
  74  * Returns:     ---
  75  *
  76  * Use:         Initializes a context structure for reduction.
  77  */
  78
  79 struct gen {
  80   unsigned f;                           /* Flags */
  81 #define f_lsr 1u                        /*   Overflow from previous word */
  82 #define f_load 2u                       /*   Outstanding @LOAD@ */
  83 #define f_fip 4u                        /*   Final-pass offset is set */
  84   instr_v iv;                           /* Instruction vector */
  85   size_t fip;                           /* Offset for final-pass reduction */
  86   size_t w;                             /* Currently loaded target word */
  87   size_t wi;                            /* Left-shifts for current word */
  88   gfreduce *r;                          /* Reduction context pointer */
  89 };
  90
  91 #define INSTR(g_, op_, arg_) do {                                       \
  92   struct gen *_g = (g_);                                                \
  93   instr_v *_iv = &_g->iv;                                               \
  94   size_t _i = DA_LEN(_iv);                                              \
  95                                                                         \
  96   DA_ENSURE(_iv, 1);                                                    \
  97   DA(_iv)[_i].op = (op_);                                               \
  98   DA(_iv)[_i].arg = (arg_);                                             \
  99   DA_EXTEND(_iv, 1);                                                    \
 100 } while (0)
 101
 102 static void emit_load(struct gen *g, size_t w)
 103 {
 104   /* --- If this is not the low-order word then note final-pass start --- *
 105    *
 106    * Once we've eliminated the whole high-degree words, there will possibly
 107    * remain a few high-degree bits.  We can further reduce the subject
 108    * polynomial by subtracting an appropriate multiple of %$p'$%, but if we
 109    * do this naively we'll end up addressing `low-order' words beyond the
 110    * bottom of our input.  We solve this problem by storing an alternative
 111    * start position for this final pass (which works because we scan bits
 112    * right-to-left).
 113    */
 114
 115   if (!(g->f & f_fip) && w < g->r->lim) {
 116     g->fip = DA_LEN(&g->iv);
 117     g->f |= f_fip;
 118   }
 119
 120   /* --- Actually emit the instruction --- */
 121
 122   INSTR(g, GFRI_LOAD, w);
 123   g->f |= f_load;
 124   g->w = w;
 125 }
 126
 127 static void emit_right_shifts(struct gen *g)
 128 {
 129   gfreduce_instr *ip;
 130   size_t i, wl;
 131
 132   /* --- Close off the current word --- *
 133    *
 134    * If we shifted into this current word with a nonzero bit offset, then
 135    * we'll also need to arrange to perform a sequence of right shifts into
 136    * the following word, which we might as well do by scanning the
 137    * instruction sequence (which starts at @wi@).
 138    *
 139    * Either way, we leave a @LOAD@ unmatched if there was one before, in the
 140    * hope that callers have an easier time; @g->w@ is updated to reflect the
 141    * currently open word.
 142    */
 143
 144   if (!(g->f & f_lsr))
 145     return;
 146
 147   wl = DA_LEN(&g->iv);
 148   INSTR(g, GFRI_STORE, g->w);
 149   emit_load(g, g->w - 1);
 150   for (i = g->wi; i < wl; i++) {
 151     ip = &DA(&g->iv)[i];
 152     assert(ip->op == GFRI_LSL);
 153     if (ip->arg)
 154       INSTR(g, GFRI_LSR, MPW_BITS - ip->arg);
 155   }
 156   g->f &= ~f_lsr;
 157 }
 158
 159 static void ensure_loaded(struct gen *g, size_t w)
 160 {
 161   if (!(g->f & f_load)) {
 162     emit_load(g, w);
 163     g->wi = DA_LEN(&g->iv);
 164   } else if (w != g->w) {
 165     emit_right_shifts(g);
 166     if (w != g->w) {
 167       INSTR(g, GFRI_STORE, g->w);
 168       emit_load(g, w);
 169     }
 170     g->wi = DA_LEN(&g->iv);
 171   }
 172 }
 173
 174 void gfreduce_create(gfreduce *r, mp *p)
 175 {
 176   struct gen g = { 0, DA_INIT };
 177   unsigned long d;
 178   unsigned dw;
 179   mpscan sc;
 180   unsigned long i;
 181   size_t w, bb;
 182
 183   /* --- Sort out the easy stuff --- */
 184
 185   g.r = r;
 186   d = mp_bits(p); assert(d); d--;
 187   r->lim = d/MPW_BITS;
 188   dw = d%MPW_BITS;
 189   if (!dw)
 190     r->mask = 0;
 191   else {
 192     r->mask = MPW(((mpw)-1) << dw);
 193     r->lim++;
 194   }
 195   r->p = mp_copy(p);
 196
 197   /* --- How this works --- *
 198    *
 199    * The instruction sequence is run with two ambient parameters: a pointer
 200    * (usually) just past the most significant word of the polynomial to be
 201    * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant
 202    * to add.
 203    *
 204    * The sequence visits each word of the polynomial at most once.  Suppose
 205    * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%.
 206    * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where
 207    * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects
 208    * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple
 209    * of %$w$%) also word %$I - 1$%.
 210    *
 211    * We have four instructions: @LOAD@ reads a specified word of %$u$% into an
 212    * accumulator, and @STORE@ stores it back (we'll always store back to the
 213    * same word we most recently read, but this isn't a requirement); and
 214    * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into
 215    * the accumulator.  So a typical program will contain sequences of @LSR@
 216    * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs.
 217    *
 218    * We do a single right-to-left pass across %$p$%.
 219    */
 220
 221   bb = MPW_BITS - dw;
 222
 223   for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
 224     if (!mp_bit(&sc))
 225       continue;
 226
 227     /* --- We've found a set bit, so work out which word it affects --- *
 228      *
 229      * In general, a bit affects two words: it needs to be shifted left into
 230      * one, and shifted right into the next.  We find the former here.
 231      */
 232
 233     w = (d - i + MPW_BITS - 1)/MPW_BITS;
 234
 235     /* --- Concentrate on the appropriate word --- */
 236
 237     ensure_loaded(&g, w);
 238
 239     /* --- Accumulate a new @LSL@ instruction --- *
 240      *
 241      * If this was a nonzero shift, then we'll need to arrange to do right
 242      * shifts into the following word.
 243      */
 244
 245     INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS);
 246     if ((bb + i)%MPW_BITS)
 247       g.f |= f_lsr;
 248   }
 249
 250   /* --- Wrapping up --- *
 251    *
 252    * We probably need a final @STORE@, and maybe a sequence of right shifts.
 253    */
 254
 255   if (g.f & f_load) {
 256     emit_right_shifts(&g);
 257     INSTR(&g, GFRI_STORE, g.w);
 258   }
 259
 260   /* --- Copy the instruction vector.
 261    *
 262    * If we've not set a final-pass offset yet then now would be an excellent
 263    * time.  Obviously it should be right at the end, because there's nothing
 264    * for a final pass to do.
 265    */
 266
 267   r->in = DA_LEN(&g.iv);
 268   r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
 269   memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr));
 270
 271   if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv);
 272   r->fiv = r->iv + g.fip;
 273
 274   DA_DESTROY(&g.iv);
 275 }
 276
 277 #undef INSTR
 278
 279 #undef f_lsr
 280 #undef f_load
 281 #undef f_fip
 282
 283 /* --- @gfreduce_destroy@ --- *
 284  *
 285  * Arguments:   @gfreduce *r@ = structure to free
 286  *
 287  * Returns:     ---
 288  *
 289  * Use:         Reclaims the resources from a reduction context.
 290  */
 291
 292 void gfreduce_destroy(gfreduce *r)
 293 {
 294   mp_drop(r->p);
 295   xfree(r->iv);
 296 }
 297
 298 /* --- @gfreduce_dump@ --- *
 299  *
 300  * Arguments:   @const gfreduce *r@ = structure to dump
 301  *              @FILE *fp@ = file to dump on
 302  *
 303  * Returns:     ---
 304  *
 305  * Use:         Dumps a reduction context.
 306  */
 307
 308 void gfreduce_dump(const gfreduce *r, FILE *fp)
 309 {
 310   size_t i;
 311
 312   fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
 313   fprintf(fp, "\n  lim = %lu; mask = %lx\n",
 314           (unsigned long)r->lim, (unsigned long)r->mask);
 315   for (i = 0; i < r->in; i++) {
 316     static const char *opname[] = { "load", "lsl", "lsr", "store" };
 317     if (&r->iv[i] == r->fiv)
 318       fputs("final:\n", fp);
 319     assert(r->iv[i].op < N(opname));
 320     fprintf(fp, "  %s %lu\n",
 321             opname[r->iv[i].op],
 322             (unsigned long)r->iv[i].arg);
 323   }
 324   if (&r->iv[i] == r->fiv)
 325     fputs("final:\n", fp);
 326 }
 327
 328 /* --- @gfreduce_do@ --- *
 329  *
 330  * Arguments:   @const gfreduce *r@ = reduction context
 331  *              @mp *d@ = destination
 332  *              @mp *x@ = source
 333  *
 334  * Returns:     Destination, @x@ reduced modulo the reduction poly.
 335  */
 336
 337 static void run(const gfreduce_instr *i, const gfreduce_instr *il,
 338                 mpw *v, mpw z)
 339 {
 340   mpw w = 0;
 341
 342   for (; i < il; i++) {
 343     switch (i->op) {
 344       case GFRI_LOAD: w = *(v - i->arg); break;
 345       case GFRI_LSL: w ^= z << i->arg; break;
 346       case GFRI_LSR: w ^= z >> i->arg; break;
 347       case GFRI_STORE: *(v - i->arg) = MPW(w); break;
 348       default: abort();
 349     }
 350   }
 351 }
 352
 353 mp *gfreduce_do(const gfreduce *r, mp *d, mp *x)
 354 {
 355   mpw *v, *vl;
 356   const gfreduce_instr *il;
 357   mpw z;
 358
 359   /* --- Try to reuse the source's space --- */
 360
 361   MP_COPY(x);
 362   if (d) MP_DROP(d);
 363   MP_DEST(x, MP_LEN(x), x->f);
 364
 365   /* --- Do the reduction --- */
 366
 367   il = r->iv + r->in;
 368   if (MP_LEN(x) >= r->lim) {
 369     v = x->v + r->lim;
 370     vl = x->vl;
 371     while (vl-- > v) {
 372       while (*vl) {
 373         z = *vl;
 374         *vl = 0;
 375         run(r->iv, il, vl, z);
 376       }
 377     }
 378     if (r->mask) {
 379       while (*vl & r->mask) {
 380         z = *vl & r->mask;
 381         *vl &= ~r->mask;
 382         run(r->fiv, il, vl, z);
 383       }
 384     }
 385   }
 386
 387   /* --- Done --- */
 388
 389   MP_SHRINK(x);
 390   return (x);
 391 }
 392
 393 /* --- @gfreduce_sqrt@ --- *
 394  *
 395  * Arguments:   @const gfreduce *r@ = pointer to reduction context
 396  *              @mp *d@ = destination
 397  *              @mp *x@ = some polynomial
 398  *
 399  * Returns:     The square root of @x@ modulo @r->p@, or null.
 400  */
 401
 402 mp *gfreduce_sqrt(const gfreduce *r, mp *d, mp *x)
 403 {
 404   mp *y = MP_COPY(x);
 405   mp *z, *spare = MP_NEW;
 406   unsigned long m = mp_bits(r->p) - 1;
 407   unsigned long i;
 408
 409   /* --- This is pretty easy --- *
 410    *
 411    * Note that %$x = x^{2^m}$%; therefore %$(x^{2^{m-1}})^2 = x^{2^m} = x$%,
 412    * so %$x^{2^{m-1}}$% is the square root we seek.
 413    */
 414
 415   for (i = 0; i < m - 1; i++) {
 416     mp *t = gf_sqr(spare, y);
 417     spare = y;
 418     y = gfreduce_do(r, t, t);
 419   }
 420   z = gf_sqr(spare, y);
 421   z = gfreduce_do(r, z, z);
 422   if (!MP_EQ(x, z)) {
 423     mp_drop(y);
 424     y = 0;
 425   }
 426   mp_drop(z);
 427   mp_drop(d);
 428   return (y);
 429 }
 430
 431 /* --- @gfreduce_trace@ --- *
 432  *
 433  * Arguments:   @const gfreduce *r@ = pointer to reduction context
 434  *              @mp *x@ = some polynomial
 435  *
 436  * Returns:     The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
 437  *              if %$x \in \gf{2^m}$%).  Since the trace is invariant under
 438  *              the Frobenius automorphism (i.e., %$\Tr(x)^2 = \Tr(x)$%), it
 439  *              must be an element of the base field, i.e., %$\gf{2}$%, and
 440  *              we only need a single bit to represent it.
 441  */
 442
 443 int gfreduce_trace(const gfreduce *r, mp *x)
 444 {
 445   mp *y = MP_COPY(x);
 446   mp *spare = MP_NEW;
 447   unsigned long m = mp_bits(r->p) - 1;
 448   unsigned long i;
 449   int rc;
 450
 451   for (i = 0; i < m - 1; i++) {
 452     mp *t = gf_sqr(spare, y);
 453     spare = y;
 454     y = gfreduce_do(r, t, t);
 455     y = gf_add(y, y, x);
 456   }
 457   rc = !MP_ZEROP(y);
 458   mp_drop(spare);
 459   mp_drop(y);
 460   return (rc);
 461 }
 462
 463 /* --- @gfreduce_halftrace@ --- *
 464  *
 465  * Arguments:   @const gfreduce *r@ = pointer to reduction context
 466  *              @mp *d@ = destination
 467  *              @mp *x@ = some polynomial
 468  *
 469  * Returns:     The half-trace of @x@.
 470  *              (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
 471  *              if %$x \in \gf{2^m}$% with %$m$% odd).
 472  */
 473
 474 mp *gfreduce_halftrace(const gfreduce *r, mp *d, mp *x)
 475 {
 476   mp *y = MP_COPY(x);
 477   mp *spare = MP_NEW;
 478   unsigned long m = mp_bits(r->p) - 1;
 479   unsigned long i;
 480
 481   mp_drop(d);
 482   for (i = 0; i < m - 1; i += 2) {
 483     mp *t = gf_sqr(spare, y);
 484     spare = y;
 485     y = gfreduce_do(r, t, t);
 486     t = gf_sqr(spare, y);
 487     spare = y;
 488     y = gfreduce_do(r, t, t);
 489     y = gf_add(y, y, x);
 490   }
 491   mp_drop(spare);
 492   return (y);
 493 }
 494
 495 /* --- @gfreduce_quadsolve@ --- *
 496  *
 497  * Arguments:   @const gfreduce *r@ = pointer to reduction context
 498  *              @mp *d@ = destination
 499  *              @mp *x@ = some polynomial
 500  *
 501  * Returns:     A polynomial @z@ such that %$z^2 + z = x$%, or null.
 502  *
 503  * Use:         Solves quadratic equations in a field with characteristic 2.
 504  *              Suppose we have an equation %$y^2 + A y + B = 0$% where
 505  *              %$A \ne 0$%.  (If %$A = 0$% then %$y = \sqrt{B}$% and you
 506  *              want @gfreduce_sqrt@ instead.)  Use this function to solve
 507  *              %$z^2 + z = B/A^2$%; then set %$y = A z$%, since
 508  *              %$y^2 + y = A^2 z^2 + A^2 z = A^2 (z^2 + z) = B$% as
 509  *              required.
 510  *
 511  *              The two roots are %$z$% and %$z + 1$%; this function always
 512  *              returns the one with zero scalar coefficient.
 513  */
 514
 515 mp *gfreduce_quadsolve(const gfreduce *r, mp *d, mp *x)
 516 {
 517   unsigned long m = mp_bits(r->p) - 1;
 518   mp *t;
 519
 520   /* --- About the solutions --- *
 521    *
 522    * Factor %$z^2 + z = z (z + 1)$%.  Therefore, if %$z^2 + z = x$% and
 523    * %$z' = z + 1$% then %$z'^2 + z' = z^2 + 1 + z + 1 = z^2 + z$%, so
 524    * %$z + 1$% is the other solution.
 525    *
 526    * A solution exists if and only if %$\Tr(x) = 0$%.  To see the `only if'
 527    * implication, recall that the trace function is linear, and hence
 528    * $%\Tr(z^2 + z) = \Tr(z)^2 + \Tr(z) = \Tr(z) + \Tr(z) = 0$%.  The `if'
 529    * direction will be proven using explicit constructions captured in the
 530    * code below.
 531    */
 532
 533   MP_COPY(x);
 534   if (m & 1) {
 535
 536     /* --- A short-cut for fields with odd degree ---
 537      *
 538      * The method below works in all binary fields, but there's a quicker way
 539      * which works whenever the degree is odd.  The half-trace is
 540      * %$z = \sum_{0\le i\le (m-1)/2} x^{2^{2i}}$%.  Then %$z^2 + z = {}$%
 541      * %$\sum_{0\le i\le (m-1)/2} (x^{2^{2i}} + x^{2^{2i+1}}) = {}$%
 542      * %$\Tr(x) + x^{2^m} = \Tr(x) + x$%.  This therefore gives us the
 543      * solution we want whenever %$\Tr(x) = 0$%.
 544      */
 545
 546     d = gfreduce_halftrace(r, d, x);
 547   } else {
 548     mp *z, *w, *rho = MP_NEW;
 549     mp *spare = MP_NEW;
 550     grand *fr = fibrand_create(0);
 551     unsigned long i;
 552
 553     /* --- Unpicking the magic --- *
 554      *
 555      * Choose %$\rho \inr \gf{2^m}$% with %$\Tr(\rho) = 1$%.  Let
 556      * %$z = \sum_{0\le i<m} \rho^{2^i} \sum_{0\le j<i} x^{2^j} = {}$%
 557      * %$\sum_{1\le i<m} \rho^{2^i} (x + \sum_{1\le j<i} x^{2^j} = {}$%
 558      * %$\rho^2 x + \rho^4 (x + x^2) + \rho^8 (x + x^2 + x^4) + \cdots + {}$%
 559      * %$\rho^{2^{m-1}} (x + x^2 + x^{2^{m-2}})$%.  Then %$z^2 = {}$%
 560      * %$\sum_{0\le i<m} \rho^{2^{i+1}} \sum_{0\le j<i} x^{2^{j+1}} = {}$%
 561      * %$\sum_{1\le i\le m} \rho^{2^i} \sum_{1\le j<i} x^{2^j} = {}$%
 562      * %$\sum_{1\le i<m} \rho^{2^i} \sum_{1\le j<i} x^{2^j} + {}$%
 563      * %$\rho^{2^m} \sum_{1\le j<m} x^{2^j}$%; and, somewhat miraculously,
 564      * %$z^2 + z = \sum_{1\le i<m} \rho^{2^i} x + {}$%
 565      * %$\rho \sum_{1\le i<m} x^{2^i} = x (\Tr(\rho) + \rho) + {}$%
 566      * %$\rho (\Tr(x) + x) = x \Tr(\rho) + \rho \Tr(x)$%.  Again,
 567      * this gives us the root we want whenever %$\Tr(x) = 0$%.
 568      *
 569      * The loop below calculates %$w = \Tr(\rho)$% and %$z$% simultaneously,
 570      * since the same powers of %$\rho$% are wanted in both calculations.
 571      */
 572
 573     for (;;) {
 574       rho = mprand(rho, m, fr, 0);
 575       z = MP_ZERO;
 576       w = MP_COPY(rho);
 577       for (i = 0; i < m - 1; i++) {
 578         t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
 579         t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
 580         t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
 581         z = gf_add(z, z, t);
 582         w = gf_add(w, w, rho);
 583       }
 584       if (!MP_ZEROP(w))
 585         break;
 586       MP_DROP(z);
 587       MP_DROP(w);
 588     }
 589     if (d) MP_DROP(d);
 590     MP_DROP(w);
 591     MP_DROP(spare);
 592     MP_DROP(rho);
 593     fr->ops->destroy(fr);
 594     d = z;
 595   }
 596
 597   /* --- Check that we calculated the right answer --- *
 598    *
 599    * It should be correct; if it's not then maybe the ring we're working in
 600    * isn't really a field.
 601    */
 602
 603   t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
 604   if (!MP_EQ(t, x)) {
 605     MP_DROP(d);
 606     d = 0;
 607   }
 608   MP_DROP(t);
 609   MP_DROP(x);
 610
 611   /* --- Pick a canonical root --- *
 612    *
 613    * The two roots are %$z$% and %$z + 1$%; pick the one with a zero
 614    * scalar coefficient just for consistency's sake.
 615    */
 616
 617   if (d) d->v[0] &= ~(mpw)1;
 618   return (d);
 619 }
 620
 621 /* --- @gfreduce_exp@ --- *
 622  *
 623  * Arguments:   @const gfreduce *gr@ = pointer to reduction context
 624  *              @mp *d@ = fake destination
 625  *              @mp *a@ = base
 626  *              @mp *e@ = exponent
 627  *
 628  * Returns:     Result, %$a^e \bmod m$%.
 629  */
 630
 631 mp *gfreduce_exp(const gfreduce *gr, mp *d, mp *a, mp *e)
 632 {
 633   mp *x = MP_ONE;
 634   mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
 635
 636   MP_SHRINK(e);
 637   MP_COPY(a);
 638   if (MP_ZEROP(e))
 639     ;
 640   else {
 641     if (MP_NEGP(e))
 642       a = gf_modinv(a, a, gr->p);
 643     if (MP_LEN(e) < EXP_THRESH)
 644       EXP_SIMPLE(x, a, e);
 645     else
 646       EXP_WINDOW(x, a, e);
 647   }
 648   mp_drop(d);
 649   mp_drop(a);
 650   mp_drop(spare);
 651   return (x);
 652 }
 653
 654 /*----- Test rig ----------------------------------------------------------*/
 655
 656 #ifdef TEST_RIG
 657
 658 static int vreduce(dstr *v)
 659 {
 660   mp *d = *(mp **)v[0].buf;
 661   mp *n = *(mp **)v[1].buf;
 662   mp *r = *(mp **)v[2].buf;
 663   mp *c;
 664   int ok = 1;
 665   gfreduce rr;
 666
 667   gfreduce_create(&rr, d);
 668   c = gfreduce_do(&rr, MP_NEW, n);
 669   if (!MP_EQ(c, r)) {
 670     fprintf(stderr, "\n*** reduction failed\n*** ");
 671     gfreduce_dump(&rr, stderr);
 672     fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
 673     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 674     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 675     fprintf(stderr, "\n");
 676     ok = 0;
 677   }
 678   gfreduce_destroy(&rr);
 679   mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
 680   assert(mparena_count(MPARENA_GLOBAL) == 0);
 681   return (ok);
 682 }
 683
 684 static int vmodexp(dstr *v)
 685 {
 686   mp *p = *(mp **)v[0].buf;
 687   mp *g = *(mp **)v[1].buf;
 688   mp *x = *(mp **)v[2].buf;
 689   mp *r = *(mp **)v[3].buf;
 690   mp *c;
 691   int ok = 1;
 692   gfreduce rr;
 693
 694   gfreduce_create(&rr, p);
 695   c = gfreduce_exp(&rr, MP_NEW, g, x);
 696   if (!MP_EQ(c, r)) {
 697     fprintf(stderr, "\n*** modexp failed\n*** ");
 698     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 699     fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
 700     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 701     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 702     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 703     fprintf(stderr, "\n");
 704     ok = 0;
 705   }
 706   gfreduce_destroy(&rr);
 707   mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
 708   assert(mparena_count(MPARENA_GLOBAL) == 0);
 709   return (ok);
 710 }
 711
 712 static int vsqrt(dstr *v)
 713 {
 714   mp *p = *(mp **)v[0].buf;
 715   mp *x = *(mp **)v[1].buf;
 716   mp *r = *(mp **)v[2].buf;
 717   mp *c;
 718   int ok = 1;
 719   gfreduce rr;
 720
 721   gfreduce_create(&rr, p);
 722   c = gfreduce_sqrt(&rr, MP_NEW, x);
 723   if (!MP_EQ(c, r)) {
 724     fprintf(stderr, "\n*** sqrt failed\n*** ");
 725     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 726     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 727     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 728     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 729     fprintf(stderr, "\n");
 730     ok = 0;
 731   }
 732   gfreduce_destroy(&rr);
 733   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 734   assert(mparena_count(MPARENA_GLOBAL) == 0);
 735   return (ok);
 736 }
 737
 738 static int vtr(dstr *v)
 739 {
 740   mp *p = *(mp **)v[0].buf;
 741   mp *x = *(mp **)v[1].buf;
 742   int r = *(int *)v[2].buf, c;
 743   int ok = 1;
 744   gfreduce rr;
 745
 746   gfreduce_create(&rr, p);
 747   c = gfreduce_trace(&rr, x);
 748   if (c != r) {
 749     fprintf(stderr, "\n*** trace failed\n*** ");
 750     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 751     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 752     fprintf(stderr, "\n*** c = %d", c);
 753     fprintf(stderr, "\n*** r = %d", r);
 754     fprintf(stderr, "\n");
 755     ok = 0;
 756   }
 757   gfreduce_destroy(&rr);
 758   mp_drop(p); mp_drop(x);
 759   assert(mparena_count(MPARENA_GLOBAL) == 0);
 760   return (ok);
 761 }
 762
 763 static int vhftr(dstr *v)
 764 {
 765   mp *p = *(mp **)v[0].buf;
 766   mp *x = *(mp **)v[1].buf;
 767   mp *r = *(mp **)v[2].buf;
 768   mp *c;
 769   int ok = 1;
 770   gfreduce rr;
 771
 772   gfreduce_create(&rr, p);
 773   c = gfreduce_halftrace(&rr, MP_NEW, x);
 774   if (!MP_EQ(c, r)) {
 775     fprintf(stderr, "\n*** halftrace failed\n*** ");
 776     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 777     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 778     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 779     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 780     fprintf(stderr, "\n");
 781     ok = 0;
 782   }
 783   gfreduce_destroy(&rr);
 784   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 785   assert(mparena_count(MPARENA_GLOBAL) == 0);
 786   return (ok);
 787 }
 788
 789 static int vquad(dstr *v)
 790 {
 791   mp *p = *(mp **)v[0].buf;
 792   mp *x = *(mp **)v[1].buf;
 793   mp *r = *(mp **)v[2].buf;
 794   mp *c;
 795   int ok = 1;
 796   gfreduce rr;
 797
 798   gfreduce_create(&rr, p);
 799   c = gfreduce_quadsolve(&rr, MP_NEW, x);
 800   if (!MP_EQ(c, r)) {
 801     fprintf(stderr, "\n*** quadsolve failed\n*** ");
 802     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 803     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 804     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 805     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 806     fprintf(stderr, "\n");
 807     ok = 0;
 808   }
 809   gfreduce_destroy(&rr);
 810   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 811   assert(mparena_count(MPARENA_GLOBAL) == 0);
 812   return (ok);
 813 }
 814
 815 static test_chunk defs[] = {
 816   { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
 817   { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
 818   { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
 819   { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
 820   { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
 821   { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
 822   { 0, 0, { 0 } }
 823 };
 824
 825 int main(int argc, char *argv[])
 826 {
 827   test_run(argc, argv, defs, SRCDIR"/t/gfreduce");
 828   return (0);
 829 }
 830
 831 #endif
 832
 833 /*----- That's all, folks -------------------------------------------------*/