chiark - git - mdw - catacomb/blob - math/gfreduce.c

   1 /* -*-c-*-
   2  *
   3  * Efficient reduction modulo sparse binary polynomials
   4  *
   5  * (c) 2004 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Catacomb.
  11  *
  12  * Catacomb is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU Library General Public License as
  14  * published by the Free Software Foundation; either version 2 of the
  15  * License, or (at your option) any later version.
  16  *
  17  * Catacomb is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU Library General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with Catacomb; if not, write to the Free
  24  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  25  * MA 02111-1307, USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <mLib/alloc.h>
  31 #include <mLib/darray.h>
  32 #include <mLib/macros.h>
  33
  34 #include "gf.h"
  35 #include "gfreduce.h"
  36 #include "gfreduce-exp.h"
  37 #include "fibrand.h"
  38 #include "mprand.h"
  39
  40 /*----- Data structures ---------------------------------------------------*/
  41
  42 DA_DECL(instr_v, gfreduce_instr);
  43
  44 /*----- Main code ---------------------------------------------------------*/
  45
  46 /* --- What's going on here? --- *
  47  *
  48  * Let's face it, @gfx_div@ sucks.  It works (I hope), but it's not in any
  49  * sense fast.  Here, we do efficient reduction modulo sparse polynomials.
  50  * (It works for arbitrary polynomials, but isn't efficient for dense ones.)
  51  *
  52  * Suppose that %$p(x) = x^n + p'(x) = \sum_{0\le i<n} p_i x^i$%, hopefully
  53  * with only a few other %$p_i \ne 0$%.  We're going to compile %$p$% into a
  54  * sequence of instructions which can be used to perform reduction modulo
  55  * %$p$%.  The important observation is that %$x^n \equiv p' \pmod p$%.
  56  *
  57  * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with
  58  * %$0 \le n' < w$%.  Let %$u(x)$% be some arbitrary polynomial.  Write
  59  * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%; then a reduction step uses
  60  * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has
  61  * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this
  62  * makes progress towards a complete reduction.
  63  *
  64  * The compiled instruction sequence computes
  65  * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%.
  66  */
  67
  68 /* --- @gfreduce_create@ --- *
  69  *
  70  * Arguments:   @gfreduce *r@ = structure to fill in
  71  *              @mp *x@ = a (hopefully sparse) polynomial
  72  *
  73  * Returns:     ---
  74  *
  75  * Use:         Initializes a context structure for reduction.
  76  */
  77
  78 struct gen {
  79   unsigned f;                           /* Flags */
  80 #define f_lsr 1u                        /*   Overflow from previous word */
  81 #define f_load 2u                       /*   Outstanding @LOAD@ */
  82 #define f_fip 4u                        /*   Final-pass offset is set */
  83   instr_v iv;                           /* Instruction vector */
  84   size_t fip;                           /* Offset for final-pass reduction */
  85   size_t w;                             /* Currently loaded target word */
  86   size_t wi;                            /* Left-shifts for current word */
  87   gfreduce *r;                          /* Reduction context pointer */
  88 };
  89
  90 #define INSTR(g_, op_, arg_) do {                                       \
  91   struct gen *_g = (g_);                                                \
  92   instr_v *_iv = &_g->iv;                                               \
  93   size_t _i = DA_LEN(_iv);                                              \
  94                                                                         \
  95   DA_ENSURE(_iv, 1);                                                    \
  96   DA(_iv)[_i].op = (op_);                                               \
  97   DA(_iv)[_i].arg = (arg_);                                             \
  98   DA_EXTEND(_iv, 1);                                                    \
  99 } while (0)
 100
 101 static void emit_load(struct gen *g, size_t w)
 102 {
 103   /* --- If this is not the low-order word then note final-pass start --- *
 104    *
 105    * Once we've eliminated the whole high-degree words, there will possibly
 106    * remain a few high-degree bits.  We can further reduce the subject
 107    * polynomial by subtracting an appropriate multiple of %$p'$%, but if we
 108    * do this naively we'll end up addressing `low-order' words beyond the
 109    * bottom of our input.  We solve this problem by storing an alternative
 110    * start position for this final pass (which works because we scan bits
 111    * right-to-left).
 112    */
 113
 114   if (!(g->f & f_fip) && w < g->r->lim) {
 115     g->fip = DA_LEN(&g->iv);
 116     g->f |= f_fip;
 117   }
 118
 119   /* --- Actually emit the instruction --- */
 120
 121   INSTR(g, GFRI_LOAD, w);
 122   g->f |= f_load;
 123   g->w = w;
 124 }
 125
 126 static void emit_right_shifts(struct gen *g)
 127 {
 128   gfreduce_instr *ip;
 129   size_t i, wl;
 130
 131   /* --- Close off the current word --- *
 132    *
 133    * If we shifted into this current word with a nonzero bit offset, then
 134    * we'll also need to arrange to perform a sequence of right shifts into
 135    * the following word, which we might as well do by scanning the
 136    * instruction sequence (which starts at @wi@).
 137    *
 138    * Either way, we leave a @LOAD@ unmatched if there was one before, in the
 139    * hope that callers have an easier time; @g->w@ is updated to reflect the
 140    * currently open word.
 141    */
 142
 143   if (!(g->f & f_lsr))
 144     return;
 145
 146   wl = DA_LEN(&g->iv);
 147   INSTR(g, GFRI_STORE, g->w);
 148   emit_load(g, g->w - 1);
 149   for (i = g->wi; i < wl; i++) {
 150     ip = &DA(&g->iv)[i];
 151     assert(ip->op == GFRI_LSL);
 152     if (ip->arg)
 153       INSTR(g, GFRI_LSR, MPW_BITS - ip->arg);
 154   }
 155   g->f &= ~f_lsr;
 156 }
 157
 158 static void ensure_loaded(struct gen *g, size_t w)
 159 {
 160   if (!(g->f & f_load)) {
 161     emit_load(g, w);
 162     g->wi = DA_LEN(&g->iv);
 163   } else if (w != g->w) {
 164     emit_right_shifts(g);
 165     if (w != g->w) {
 166       INSTR(g, GFRI_STORE, g->w);
 167       emit_load(g, w);
 168     }
 169     g->wi = DA_LEN(&g->iv);
 170   }
 171 }
 172
 173 void gfreduce_create(gfreduce *r, mp *p)
 174 {
 175   struct gen g = { 0, DA_INIT };
 176   unsigned long d;
 177   unsigned dw;
 178   mpscan sc;
 179   unsigned long i;
 180   size_t w, bb;
 181
 182   /* --- Sort out the easy stuff --- */
 183
 184   g.r = r;
 185   d = mp_bits(p); assert(d); d--;
 186   r->lim = d/MPW_BITS;
 187   dw = d%MPW_BITS;
 188   if (!dw)
 189     r->mask = 0;
 190   else {
 191     r->mask = MPW(((mpw)-1) << dw);
 192     r->lim++;
 193   }
 194   r->p = mp_copy(p);
 195
 196   /* --- How this works --- *
 197    *
 198    * The instruction sequence is run with two ambient parameters: a pointer
 199    * (usually) just past the most significant word of the polynomial to be
 200    * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant
 201    * to add.
 202    *
 203    * The sequence visits each word of the polynomial at most once.  Suppose
 204    * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%.
 205    * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where
 206    * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects
 207    * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple
 208    * of %$w$%) also word %$I - 1$%.
 209    *
 210    * We have four instructions: @LOAD@ reads a specified word of %$u$% into an
 211    * accumulator, and @STORE@ stores it back (we'll always store back to the
 212    * same word we most recently read, but this isn't a requirement); and
 213    * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into
 214    * the accumulator.  So a typical program will contain sequences of @LSR@
 215    * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs.
 216    *
 217    * We do a single right-to-left pass across %$p$%.
 218    */
 219
 220   bb = MPW_BITS - dw;
 221
 222   for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
 223     if (!mp_bit(&sc))
 224       continue;
 225
 226     /* --- We've found a set bit, so work out which word it affects --- *
 227      *
 228      * In general, a bit affects two words: it needs to be shifted left into
 229      * one, and shifted right into the next.  We find the former here.
 230      */
 231
 232     w = (d - i + MPW_BITS - 1)/MPW_BITS;
 233
 234     /* --- Concentrate on the appropriate word --- */
 235
 236     ensure_loaded(&g, w);
 237
 238     /* --- Accumulate a new @LSL@ instruction --- *
 239      *
 240      * If this was a nonzero shift, then we'll need to arrange to do right
 241      * shifts into the following word.
 242      */
 243
 244     INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS);
 245     if ((bb + i)%MPW_BITS)
 246       g.f |= f_lsr;
 247   }
 248
 249   /* --- Wrapping up --- *
 250    *
 251    * We probably need a final @STORE@, and maybe a sequence of right shifts.
 252    */
 253
 254   if (g.f & f_load) {
 255     emit_right_shifts(&g);
 256     INSTR(&g, GFRI_STORE, g.w);
 257   }
 258
 259   /* --- Copy the instruction vector.
 260    *
 261    * If we've not set a final-pass offset yet then now would be an excellent
 262    * time.  Obviously it should be right at the end, because there's nothing
 263    * for a final pass to do.
 264    */
 265
 266   r->in = DA_LEN(&g.iv);
 267   r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
 268   memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr));
 269
 270   if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv);
 271   r->fiv = r->iv + g.fip;
 272
 273   DA_DESTROY(&g.iv);
 274 }
 275
 276 #undef INSTR
 277
 278 #undef f_lsr
 279 #undef f_load
 280 #undef f_fip
 281
 282 /* --- @gfreduce_destroy@ --- *
 283  *
 284  * Arguments:   @gfreduce *r@ = structure to free
 285  *
 286  * Returns:     ---
 287  *
 288  * Use:         Reclaims the resources from a reduction context.
 289  */
 290
 291 void gfreduce_destroy(gfreduce *r)
 292 {
 293   mp_drop(r->p);
 294   xfree(r->iv);
 295 }
 296
 297 /* --- @gfreduce_dump@ --- *
 298  *
 299  * Arguments:   @gfreduce *r@ = structure to dump
 300  *              @FILE *fp@ = file to dump on
 301  *
 302  * Returns:     ---
 303  *
 304  * Use:         Dumps a reduction context.
 305  */
 306
 307 void gfreduce_dump(gfreduce *r, FILE *fp)
 308 {
 309   size_t i;
 310
 311   fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
 312   fprintf(fp, "\n  lim = %lu; mask = %lx\n",
 313           (unsigned long)r->lim, (unsigned long)r->mask);
 314   for (i = 0; i < r->in; i++) {
 315     static const char *opname[] = { "load", "lsl", "lsr", "store" };
 316     if (&r->iv[i] == r->fiv)
 317       fputs("final:\n", fp);
 318     assert(r->iv[i].op < N(opname));
 319     fprintf(fp, "  %s %lu\n",
 320             opname[r->iv[i].op],
 321             (unsigned long)r->iv[i].arg);
 322   }
 323   if (&r->iv[i] == r->fiv)
 324     fputs("final:\n", fp);
 325 }
 326
 327 /* --- @gfreduce_do@ --- *
 328  *
 329  * Arguments:   @gfreduce *r@ = reduction context
 330  *              @mp *d@ = destination
 331  *              @mp *x@ = source
 332  *
 333  * Returns:     Destination, @x@ reduced modulo the reduction poly.
 334  */
 335
 336 static void run(const gfreduce_instr *i, const gfreduce_instr *il,
 337                 mpw *v, mpw z)
 338 {
 339   mpw w = 0;
 340
 341   for (; i < il; i++) {
 342     switch (i->op) {
 343       case GFRI_LOAD: w = *(v - i->arg); break;
 344       case GFRI_LSL: w ^= z << i->arg; break;
 345       case GFRI_LSR: w ^= z >> i->arg; break;
 346       case GFRI_STORE: *(v - i->arg) = MPW(w); break;
 347       default: abort();
 348     }
 349   }
 350 }
 351
 352 mp *gfreduce_do(gfreduce *r, mp *d, mp *x)
 353 {
 354   mpw *v, *vl;
 355   const gfreduce_instr *il;
 356   mpw z;
 357
 358   /* --- Try to reuse the source's space --- */
 359
 360   MP_COPY(x);
 361   if (d) MP_DROP(d);
 362   MP_DEST(x, MP_LEN(x), x->f);
 363
 364   /* --- Do the reduction --- */
 365
 366   il = r->iv + r->in;
 367   if (MP_LEN(x) >= r->lim) {
 368     v = x->v + r->lim;
 369     vl = x->vl;
 370     while (vl-- > v) {
 371       while (*vl) {
 372         z = *vl;
 373         *vl = 0;
 374         run(r->iv, il, vl, z);
 375       }
 376     }
 377     if (r->mask) {
 378       while (*vl & r->mask) {
 379         z = *vl & r->mask;
 380         *vl &= ~r->mask;
 381         run(r->fiv, il, vl, z);
 382       }
 383     }
 384   }
 385
 386   /* --- Done --- */
 387
 388   MP_SHRINK(x);
 389   return (x);
 390 }
 391
 392 /* --- @gfreduce_sqrt@ --- *
 393  *
 394  * Arguments:   @gfreduce *r@ = pointer to reduction context
 395  *              @mp *d@ = destination
 396  *              @mp *x@ = some polynomial
 397  *
 398  * Returns:     The square root of @x@ modulo @r->p@, or null.
 399  */
 400
 401 mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x)
 402 {
 403   mp *y = MP_COPY(x);
 404   mp *z, *spare = MP_NEW;
 405   unsigned long m = mp_bits(r->p) - 1;
 406   unsigned long i;
 407
 408   for (i = 0; i < m - 1; i++) {
 409     mp *t = gf_sqr(spare, y);
 410     spare = y;
 411     y = gfreduce_do(r, t, t);
 412   }
 413   z = gf_sqr(spare, y);
 414   z = gfreduce_do(r, z, z);
 415   if (!MP_EQ(x, z)) {
 416     mp_drop(y);
 417     y = 0;
 418   }
 419   mp_drop(z);
 420   mp_drop(d);
 421   return (y);
 422 }
 423
 424 /* --- @gfreduce_trace@ --- *
 425  *
 426  * Arguments:   @gfreduce *r@ = pointer to reduction context
 427  *              @mp *x@ = some polynomial
 428  *
 429  * Returns:     The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
 430  *              if %$x \in \gf{2^m}$%).
 431  */
 432
 433 int gfreduce_trace(gfreduce *r, mp *x)
 434 {
 435   mp *y = MP_COPY(x);
 436   mp *spare = MP_NEW;
 437   unsigned long m = mp_bits(r->p) - 1;
 438   unsigned long i;
 439   int rc;
 440
 441   for (i = 0; i < m - 1; i++) {
 442     mp *t = gf_sqr(spare, y);
 443     spare = y;
 444     y = gfreduce_do(r, t, t);
 445     y = gf_add(y, y, x);
 446   }
 447   rc = !MP_ZEROP(y);
 448   mp_drop(spare);
 449   mp_drop(y);
 450   return (rc);
 451 }
 452
 453 /* --- @gfreduce_halftrace@ --- *
 454  *
 455  * Arguments:   @gfreduce *r@ = pointer to reduction context
 456  *              @mp *d@ = destination
 457  *              @mp *x@ = some polynomial
 458  *
 459  * Returns:     The half-trace of @x@.
 460  *              (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
 461  *              if %$x \in \gf{2^m}$% with %$m$% odd).
 462  */
 463
 464 mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x)
 465 {
 466   mp *y = MP_COPY(x);
 467   mp *spare = MP_NEW;
 468   unsigned long m = mp_bits(r->p) - 1;
 469   unsigned long i;
 470
 471   mp_drop(d);
 472   for (i = 0; i < m - 1; i += 2) {
 473     mp *t = gf_sqr(spare, y);
 474     spare = y;
 475     y = gfreduce_do(r, t, t);
 476     t = gf_sqr(spare, y);
 477     spare = y;
 478     y = gfreduce_do(r, t, t);
 479     y = gf_add(y, y, x);
 480   }
 481   mp_drop(spare);
 482   return (y);
 483 }
 484
 485 /* --- @gfreduce_quadsolve@ --- *
 486  *
 487  * Arguments:   @gfreduce *r@ = pointer to reduction context
 488  *              @mp *d@ = destination
 489  *              @mp *x@ = some polynomial
 490  *
 491  * Returns:     A polynomial @y@ such that %$y^2 + y = x$%, or null.
 492  */
 493
 494 mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x)
 495 {
 496   unsigned long m = mp_bits(r->p) - 1;
 497   mp *t;
 498
 499   MP_COPY(x);
 500   if (m & 1)
 501     d = gfreduce_halftrace(r, d, x);
 502   else {
 503     mp *z, *w, *rho = MP_NEW;
 504     mp *spare = MP_NEW;
 505     grand *fr = fibrand_create(0);
 506     unsigned long i;
 507
 508     for (;;) {
 509       rho = mprand(rho, m, fr, 0);
 510       z = MP_ZERO;
 511       w = MP_COPY(rho);
 512       for (i = 0; i < m - 1; i++) {
 513         t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
 514         t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
 515         t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
 516         z = gf_add(z, z, t);
 517         w = gf_add(w, w, rho);
 518       }
 519       if (!MP_ZEROP(w))
 520         break;
 521       MP_DROP(z);
 522       MP_DROP(w);
 523     }
 524     if (d) MP_DROP(d);
 525     MP_DROP(w);
 526     MP_DROP(spare);
 527     MP_DROP(rho);
 528     fr->ops->destroy(fr);
 529     d = z;
 530   }
 531
 532   t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
 533   if (!MP_EQ(t, x)) {
 534     MP_DROP(d);
 535     d = 0;
 536   }
 537   MP_DROP(t);
 538   MP_DROP(x);
 539   if (d) d->v[0] &= ~(mpw)1;
 540   return (d);
 541 }
 542
 543 /* --- @gfreduce_exp@ --- *
 544  *
 545  * Arguments:   @gfreduce *gr@ = pointer to reduction context
 546  *              @mp *d@ = fake destination
 547  *              @mp *a@ = base
 548  *              @mp *e@ = exponent
 549  *
 550  * Returns:     Result, %$a^e \bmod m$%.
 551  */
 552
 553 mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e)
 554 {
 555   mp *x = MP_ONE;
 556   mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
 557
 558   MP_SHRINK(e);
 559   MP_COPY(a);
 560   if (MP_ZEROP(e))
 561     ;
 562   else {
 563     if (MP_NEGP(e))
 564       a = gf_modinv(a, a, gr->p);
 565     if (MP_LEN(e) < EXP_THRESH)
 566       EXP_SIMPLE(x, a, e);
 567     else
 568       EXP_WINDOW(x, a, e);
 569   }
 570   mp_drop(d);
 571   mp_drop(a);
 572   mp_drop(spare);
 573   return (x);
 574 }
 575
 576 /*----- Test rig ----------------------------------------------------------*/
 577
 578 #ifdef TEST_RIG
 579
 580 static int vreduce(dstr *v)
 581 {
 582   mp *d = *(mp **)v[0].buf;
 583   mp *n = *(mp **)v[1].buf;
 584   mp *r = *(mp **)v[2].buf;
 585   mp *c;
 586   int ok = 1;
 587   gfreduce rr;
 588
 589   gfreduce_create(&rr, d);
 590   c = gfreduce_do(&rr, MP_NEW, n);
 591   if (!MP_EQ(c, r)) {
 592     fprintf(stderr, "\n*** reduction failed\n*** ");
 593     gfreduce_dump(&rr, stderr);
 594     fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
 595     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 596     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 597     fprintf(stderr, "\n");
 598     ok = 0;
 599   }
 600   gfreduce_destroy(&rr);
 601   mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
 602   assert(mparena_count(MPARENA_GLOBAL) == 0);
 603   return (ok);
 604 }
 605
 606 static int vmodexp(dstr *v)
 607 {
 608   mp *p = *(mp **)v[0].buf;
 609   mp *g = *(mp **)v[1].buf;
 610   mp *x = *(mp **)v[2].buf;
 611   mp *r = *(mp **)v[3].buf;
 612   mp *c;
 613   int ok = 1;
 614   gfreduce rr;
 615
 616   gfreduce_create(&rr, p);
 617   c = gfreduce_exp(&rr, MP_NEW, g, x);
 618   if (!MP_EQ(c, r)) {
 619     fprintf(stderr, "\n*** modexp failed\n*** ");
 620     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 621     fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
 622     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 623     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 624     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 625     fprintf(stderr, "\n");
 626     ok = 0;
 627   }
 628   gfreduce_destroy(&rr);
 629   mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
 630   assert(mparena_count(MPARENA_GLOBAL) == 0);
 631   return (ok);
 632 }
 633
 634 static int vsqrt(dstr *v)
 635 {
 636   mp *p = *(mp **)v[0].buf;
 637   mp *x = *(mp **)v[1].buf;
 638   mp *r = *(mp **)v[2].buf;
 639   mp *c;
 640   int ok = 1;
 641   gfreduce rr;
 642
 643   gfreduce_create(&rr, p);
 644   c = gfreduce_sqrt(&rr, MP_NEW, x);
 645   if (!MP_EQ(c, r)) {
 646     fprintf(stderr, "\n*** sqrt failed\n*** ");
 647     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 648     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 649     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 650     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 651     fprintf(stderr, "\n");
 652     ok = 0;
 653   }
 654   gfreduce_destroy(&rr);
 655   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 656   assert(mparena_count(MPARENA_GLOBAL) == 0);
 657   return (ok);
 658 }
 659
 660 static int vtr(dstr *v)
 661 {
 662   mp *p = *(mp **)v[0].buf;
 663   mp *x = *(mp **)v[1].buf;
 664   int r = *(int *)v[2].buf, c;
 665   int ok = 1;
 666   gfreduce rr;
 667
 668   gfreduce_create(&rr, p);
 669   c = gfreduce_trace(&rr, x);
 670   if (c != r) {
 671     fprintf(stderr, "\n*** trace failed\n*** ");
 672     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 673     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 674     fprintf(stderr, "\n*** c = %d", c);
 675     fprintf(stderr, "\n*** r = %d", r);
 676     fprintf(stderr, "\n");
 677     ok = 0;
 678   }
 679   gfreduce_destroy(&rr);
 680   mp_drop(p); mp_drop(x);
 681   assert(mparena_count(MPARENA_GLOBAL) == 0);
 682   return (ok);
 683 }
 684
 685 static int vhftr(dstr *v)
 686 {
 687   mp *p = *(mp **)v[0].buf;
 688   mp *x = *(mp **)v[1].buf;
 689   mp *r = *(mp **)v[2].buf;
 690   mp *c;
 691   int ok = 1;
 692   gfreduce rr;
 693
 694   gfreduce_create(&rr, p);
 695   c = gfreduce_halftrace(&rr, MP_NEW, x);
 696   if (!MP_EQ(c, r)) {
 697     fprintf(stderr, "\n*** halftrace failed\n*** ");
 698     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 699     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 700     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 701     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 702     fprintf(stderr, "\n");
 703     ok = 0;
 704   }
 705   gfreduce_destroy(&rr);
 706   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 707   assert(mparena_count(MPARENA_GLOBAL) == 0);
 708   return (ok);
 709 }
 710
 711 static int vquad(dstr *v)
 712 {
 713   mp *p = *(mp **)v[0].buf;
 714   mp *x = *(mp **)v[1].buf;
 715   mp *r = *(mp **)v[2].buf;
 716   mp *c;
 717   int ok = 1;
 718   gfreduce rr;
 719
 720   gfreduce_create(&rr, p);
 721   c = gfreduce_quadsolve(&rr, MP_NEW, x);
 722   if (!MP_EQ(c, r)) {
 723     fprintf(stderr, "\n*** quadsolve failed\n*** ");
 724     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 725     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 726     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 727     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 728     fprintf(stderr, "\n");
 729     ok = 0;
 730   }
 731   gfreduce_destroy(&rr);
 732   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 733   assert(mparena_count(MPARENA_GLOBAL) == 0);
 734   return (ok);
 735 }
 736
 737 static test_chunk defs[] = {
 738   { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
 739   { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
 740   { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
 741   { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
 742   { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
 743   { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
 744   { 0, 0, { 0 } }
 745 };
 746
 747 int main(int argc, char *argv[])
 748 {
 749   test_run(argc, argv, defs, SRCDIR"/t/gfreduce");
 750   return (0);
 751 }
 752
 753 #endif
 754
 755 /*----- That's all, folks -------------------------------------------------*/