chiark - git - mdw - catacomb/blob - math/gfreduce.c

   1 /* -*-c-*-
   2  *
   3  * Efficient reduction modulo sparse binary polynomials
   4  *
   5  * (c) 2004 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Catacomb.
  11  *
  12  * Catacomb is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU Library General Public License as
  14  * published by the Free Software Foundation; either version 2 of the
  15  * License, or (at your option) any later version.
  16  *
  17  * Catacomb is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU Library General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with Catacomb; if not, write to the Free
  24  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  25  * MA 02111-1307, USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <mLib/alloc.h>
  31 #include <mLib/darray.h>
  32 #include <mLib/macros.h>
  33
  34 #include "gf.h"
  35 #include "gfreduce.h"
  36 #include "gfreduce-exp.h"
  37 #include "fibrand.h"
  38 #include "mprand.h"
  39
  40 /*----- Data structures ---------------------------------------------------*/
  41
  42 DA_DECL(instr_v, gfreduce_instr);
  43
  44 /*----- Main code ---------------------------------------------------------*/
  45
  46 /* --- What's going on here? --- *
  47  *
  48  * Let's face it, @gfx_div@ sucks.  It works (I hope), but it's not in any
  49  * sense fast.  Here, we do efficient reduction modulo sparse polynomials.
  50  * (It works for arbitrary polynomials, but isn't efficient for dense ones.)
  51  *
  52  * Suppose that %$p = x^n + p'$% where %$p' = \sum_{0\le i<n} p_i x^i$%,
  53  * hopefully with only a few %$p_i \ne 0$%.  We're going to compile %$p$%
  54  * into a sequence of instructions which can be used to perform reduction
  55  * modulo %$p$%.  The important observation is that
  56  * %$x^n \equiv p' \pmod p$%.
  57  *
  58  * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with
  59  * %$0 \le n' < w$%.  Let %$u(x)$% be some arbitrary polynomial.  Write
  60  * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%.  Then a reduction step uses
  61  * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has
  62  * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this
  63  * makes progress towards a complete reduction.
  64  *
  65  * The compiled instruction sequence computes
  66  * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%.
  67  */
  68
  69 /* --- @gfreduce_create@ --- *
  70  *
  71  * Arguments:   @gfreduce *r@ = structure to fill in
  72  *              @mp *x@ = a (hopefully sparse) polynomial
  73  *
  74  * Returns:     ---
  75  *
  76  * Use:         Initializes a context structure for reduction.
  77  */
  78
  79 struct gen {
  80   unsigned f;                           /* Flags */
  81 #define f_lsr 1u                        /*   Overflow from previous word */
  82 #define f_load 2u                       /*   Outstanding @LOAD@ */
  83 #define f_fip 4u                        /*   Final-pass offset is set */
  84   instr_v iv;                           /* Instruction vector */
  85   size_t fip;                           /* Offset for final-pass reduction */
  86   size_t w;                             /* Currently loaded target word */
  87   size_t wi;                            /* Left-shifts for current word */
  88   gfreduce *r;                          /* Reduction context pointer */
  89 };
  90
  91 #define INSTR(g_, op_, arg_) do {                                       \
  92   struct gen *_g = (g_);                                                \
  93   instr_v *_iv = &_g->iv;                                               \
  94   size_t _i = DA_LEN(_iv);                                              \
  95                                                                         \
  96   DA_ENSURE(_iv, 1);                                                    \
  97   DA(_iv)[_i].op = (op_);                                               \
  98   DA(_iv)[_i].arg = (arg_);                                             \
  99   DA_EXTEND(_iv, 1);                                                    \
 100 } while (0)
 101
 102 static void emit_load(struct gen *g, size_t w)
 103 {
 104   /* --- If this is not the low-order word then note final-pass start --- *
 105    *
 106    * Once we've eliminated the whole high-degree words, there will possibly
 107    * remain a few high-degree bits.  We can further reduce the subject
 108    * polynomial by subtracting an appropriate multiple of %$p'$%, but if we
 109    * do this naively we'll end up addressing `low-order' words beyond the
 110    * bottom of our input.  We solve this problem by storing an alternative
 111    * start position for this final pass (which works because we scan bits
 112    * right-to-left).
 113    */
 114
 115   if (!(g->f & f_fip) && w < g->r->lim) {
 116     g->fip = DA_LEN(&g->iv);
 117     g->f |= f_fip;
 118   }
 119
 120   /* --- Actually emit the instruction --- */
 121
 122   INSTR(g, GFRI_LOAD, w);
 123   g->f |= f_load;
 124   g->w = w;
 125 }
 126
 127 static void emit_right_shifts(struct gen *g)
 128 {
 129   gfreduce_instr *ip;
 130   size_t i, wl;
 131
 132   /* --- Close off the current word --- *
 133    *
 134    * If we shifted into this current word with a nonzero bit offset, then
 135    * we'll also need to arrange to perform a sequence of right shifts into
 136    * the following word, which we might as well do by scanning the
 137    * instruction sequence (which starts at @wi@).
 138    *
 139    * Either way, we leave a @LOAD@ unmatched if there was one before, in the
 140    * hope that callers have an easier time; @g->w@ is updated to reflect the
 141    * currently open word.
 142    */
 143
 144   if (!(g->f & f_lsr))
 145     return;
 146
 147   wl = DA_LEN(&g->iv);
 148   INSTR(g, GFRI_STORE, g->w);
 149   emit_load(g, g->w - 1);
 150   for (i = g->wi; i < wl; i++) {
 151     ip = &DA(&g->iv)[i];
 152     assert(ip->op == GFRI_LSL);
 153     if (ip->arg)
 154       INSTR(g, GFRI_LSR, MPW_BITS - ip->arg);
 155   }
 156   g->f &= ~f_lsr;
 157 }
 158
 159 static void ensure_loaded(struct gen *g, size_t w)
 160 {
 161   if (!(g->f & f_load)) {
 162     emit_load(g, w);
 163     g->wi = DA_LEN(&g->iv);
 164   } else if (w != g->w) {
 165     emit_right_shifts(g);
 166     if (w != g->w) {
 167       INSTR(g, GFRI_STORE, g->w);
 168       emit_load(g, w);
 169     }
 170     g->wi = DA_LEN(&g->iv);
 171   }
 172 }
 173
 174 void gfreduce_create(gfreduce *r, mp *p)
 175 {
 176   struct gen g = { 0, DA_INIT };
 177   unsigned long d;
 178   unsigned dw;
 179   mpscan sc;
 180   unsigned long i;
 181   size_t w, bb;
 182
 183   /* --- Sort out the easy stuff --- */
 184
 185   g.r = r;
 186   d = mp_bits(p); assert(d); d--;
 187   r->lim = d/MPW_BITS;
 188   dw = d%MPW_BITS;
 189   if (!dw)
 190     r->mask = 0;
 191   else {
 192     r->mask = MPW(((mpw)-1) << dw);
 193     r->lim++;
 194   }
 195   r->p = mp_copy(p);
 196
 197   /* --- How this works --- *
 198    *
 199    * The instruction sequence is run with two ambient parameters: a pointer
 200    * (usually) just past the most significant word of the polynomial to be
 201    * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant
 202    * to add.
 203    *
 204    * The sequence visits each word of the polynomial at most once.  Suppose
 205    * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%.
 206    * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where
 207    * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects
 208    * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple
 209    * of %$w$%) also word %$I - 1$%.
 210    *
 211    * We have four instructions: @LOAD@ reads a specified word of %$u$% into an
 212    * accumulator, and @STORE@ stores it back (we'll always store back to the
 213    * same word we most recently read, but this isn't a requirement); and
 214    * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into
 215    * the accumulator.  So a typical program will contain sequences of @LSR@
 216    * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs.
 217    *
 218    * We do a single right-to-left pass across %$p$%.
 219    */
 220
 221   bb = MPW_BITS - dw;
 222
 223   for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
 224     if (!mp_bit(&sc))
 225       continue;
 226
 227     /* --- We've found a set bit, so work out which word it affects --- *
 228      *
 229      * In general, a bit affects two words: it needs to be shifted left into
 230      * one, and shifted right into the next.  We find the former here.
 231      */
 232
 233     w = (d - i + MPW_BITS - 1)/MPW_BITS;
 234
 235     /* --- Concentrate on the appropriate word --- */
 236
 237     ensure_loaded(&g, w);
 238
 239     /* --- Accumulate a new @LSL@ instruction --- *
 240      *
 241      * If this was a nonzero shift, then we'll need to arrange to do right
 242      * shifts into the following word.
 243      */
 244
 245     INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS);
 246     if ((bb + i)%MPW_BITS)
 247       g.f |= f_lsr;
 248   }
 249
 250   /* --- Wrapping up --- *
 251    *
 252    * We probably need a final @STORE@, and maybe a sequence of right shifts.
 253    */
 254
 255   if (g.f & f_load) {
 256     emit_right_shifts(&g);
 257     INSTR(&g, GFRI_STORE, g.w);
 258   }
 259
 260   /* --- Copy the instruction vector.
 261    *
 262    * If we've not set a final-pass offset yet then now would be an excellent
 263    * time.  Obviously it should be right at the end, because there's nothing
 264    * for a final pass to do.
 265    */
 266
 267   r->in = DA_LEN(&g.iv);
 268   r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
 269   memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr));
 270
 271   if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv);
 272   r->fiv = r->iv + g.fip;
 273
 274   DA_DESTROY(&g.iv);
 275 }
 276
 277 #undef INSTR
 278
 279 #undef f_lsr
 280 #undef f_load
 281 #undef f_fip
 282
 283 /* --- @gfreduce_destroy@ --- *
 284  *
 285  * Arguments:   @gfreduce *r@ = structure to free
 286  *
 287  * Returns:     ---
 288  *
 289  * Use:         Reclaims the resources from a reduction context.
 290  */
 291
 292 void gfreduce_destroy(gfreduce *r)
 293 {
 294   mp_drop(r->p);
 295   xfree(r->iv);
 296 }
 297
 298 /* --- @gfreduce_dump@ --- *
 299  *
 300  * Arguments:   @gfreduce *r@ = structure to dump
 301  *              @FILE *fp@ = file to dump on
 302  *
 303  * Returns:     ---
 304  *
 305  * Use:         Dumps a reduction context.
 306  */
 307
 308 void gfreduce_dump(gfreduce *r, FILE *fp)
 309 {
 310   size_t i;
 311
 312   fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
 313   fprintf(fp, "\n  lim = %lu; mask = %lx\n",
 314           (unsigned long)r->lim, (unsigned long)r->mask);
 315   for (i = 0; i < r->in; i++) {
 316     static const char *opname[] = { "load", "lsl", "lsr", "store" };
 317     if (&r->iv[i] == r->fiv)
 318       fputs("final:\n", fp);
 319     assert(r->iv[i].op < N(opname));
 320     fprintf(fp, "  %s %lu\n",
 321             opname[r->iv[i].op],
 322             (unsigned long)r->iv[i].arg);
 323   }
 324   if (&r->iv[i] == r->fiv)
 325     fputs("final:\n", fp);
 326 }
 327
 328 /* --- @gfreduce_do@ --- *
 329  *
 330  * Arguments:   @gfreduce *r@ = reduction context
 331  *              @mp *d@ = destination
 332  *              @mp *x@ = source
 333  *
 334  * Returns:     Destination, @x@ reduced modulo the reduction poly.
 335  */
 336
 337 static void run(const gfreduce_instr *i, const gfreduce_instr *il,
 338                 mpw *v, mpw z)
 339 {
 340   mpw w = 0;
 341
 342   for (; i < il; i++) {
 343     switch (i->op) {
 344       case GFRI_LOAD: w = *(v - i->arg); break;
 345       case GFRI_LSL: w ^= z << i->arg; break;
 346       case GFRI_LSR: w ^= z >> i->arg; break;
 347       case GFRI_STORE: *(v - i->arg) = MPW(w); break;
 348       default: abort();
 349     }
 350   }
 351 }
 352
 353 mp *gfreduce_do(gfreduce *r, mp *d, mp *x)
 354 {
 355   mpw *v, *vl;
 356   const gfreduce_instr *il;
 357   mpw z;
 358
 359   /* --- Try to reuse the source's space --- */
 360
 361   MP_COPY(x);
 362   if (d) MP_DROP(d);
 363   MP_DEST(x, MP_LEN(x), x->f);
 364
 365   /* --- Do the reduction --- */
 366
 367   il = r->iv + r->in;
 368   if (MP_LEN(x) >= r->lim) {
 369     v = x->v + r->lim;
 370     vl = x->vl;
 371     while (vl-- > v) {
 372       while (*vl) {
 373         z = *vl;
 374         *vl = 0;
 375         run(r->iv, il, vl, z);
 376       }
 377     }
 378     if (r->mask) {
 379       while (*vl & r->mask) {
 380         z = *vl & r->mask;
 381         *vl &= ~r->mask;
 382         run(r->fiv, il, vl, z);
 383       }
 384     }
 385   }
 386
 387   /* --- Done --- */
 388
 389   MP_SHRINK(x);
 390   return (x);
 391 }
 392
 393 /* --- @gfreduce_sqrt@ --- *
 394  *
 395  * Arguments:   @gfreduce *r@ = pointer to reduction context
 396  *              @mp *d@ = destination
 397  *              @mp *x@ = some polynomial
 398  *
 399  * Returns:     The square root of @x@ modulo @r->p@, or null.
 400  */
 401
 402 mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x)
 403 {
 404   mp *y = MP_COPY(x);
 405   mp *z, *spare = MP_NEW;
 406   unsigned long m = mp_bits(r->p) - 1;
 407   unsigned long i;
 408
 409   for (i = 0; i < m - 1; i++) {
 410     mp *t = gf_sqr(spare, y);
 411     spare = y;
 412     y = gfreduce_do(r, t, t);
 413   }
 414   z = gf_sqr(spare, y);
 415   z = gfreduce_do(r, z, z);
 416   if (!MP_EQ(x, z)) {
 417     mp_drop(y);
 418     y = 0;
 419   }
 420   mp_drop(z);
 421   mp_drop(d);
 422   return (y);
 423 }
 424
 425 /* --- @gfreduce_trace@ --- *
 426  *
 427  * Arguments:   @gfreduce *r@ = pointer to reduction context
 428  *              @mp *x@ = some polynomial
 429  *
 430  * Returns:     The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
 431  *              if %$x \in \gf{2^m}$%).
 432  */
 433
 434 int gfreduce_trace(gfreduce *r, mp *x)
 435 {
 436   mp *y = MP_COPY(x);
 437   mp *spare = MP_NEW;
 438   unsigned long m = mp_bits(r->p) - 1;
 439   unsigned long i;
 440   int rc;
 441
 442   for (i = 0; i < m - 1; i++) {
 443     mp *t = gf_sqr(spare, y);
 444     spare = y;
 445     y = gfreduce_do(r, t, t);
 446     y = gf_add(y, y, x);
 447   }
 448   rc = !MP_ZEROP(y);
 449   mp_drop(spare);
 450   mp_drop(y);
 451   return (rc);
 452 }
 453
 454 /* --- @gfreduce_halftrace@ --- *
 455  *
 456  * Arguments:   @gfreduce *r@ = pointer to reduction context
 457  *              @mp *d@ = destination
 458  *              @mp *x@ = some polynomial
 459  *
 460  * Returns:     The half-trace of @x@.
 461  *              (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
 462  *              if %$x \in \gf{2^m}$% with %$m$% odd).
 463  */
 464
 465 mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x)
 466 {
 467   mp *y = MP_COPY(x);
 468   mp *spare = MP_NEW;
 469   unsigned long m = mp_bits(r->p) - 1;
 470   unsigned long i;
 471
 472   mp_drop(d);
 473   for (i = 0; i < m - 1; i += 2) {
 474     mp *t = gf_sqr(spare, y);
 475     spare = y;
 476     y = gfreduce_do(r, t, t);
 477     t = gf_sqr(spare, y);
 478     spare = y;
 479     y = gfreduce_do(r, t, t);
 480     y = gf_add(y, y, x);
 481   }
 482   mp_drop(spare);
 483   return (y);
 484 }
 485
 486 /* --- @gfreduce_quadsolve@ --- *
 487  *
 488  * Arguments:   @gfreduce *r@ = pointer to reduction context
 489  *              @mp *d@ = destination
 490  *              @mp *x@ = some polynomial
 491  *
 492  * Returns:     A polynomial @y@ such that %$y^2 + y = x$%, or null.
 493  */
 494
 495 mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x)
 496 {
 497   unsigned long m = mp_bits(r->p) - 1;
 498   mp *t;
 499
 500   MP_COPY(x);
 501   if (m & 1)
 502     d = gfreduce_halftrace(r, d, x);
 503   else {
 504     mp *z, *w, *rho = MP_NEW;
 505     mp *spare = MP_NEW;
 506     grand *fr = fibrand_create(0);
 507     unsigned long i;
 508
 509     for (;;) {
 510       rho = mprand(rho, m, fr, 0);
 511       z = MP_ZERO;
 512       w = MP_COPY(rho);
 513       for (i = 0; i < m - 1; i++) {
 514         t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
 515         t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
 516         t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
 517         z = gf_add(z, z, t);
 518         w = gf_add(w, w, rho);
 519       }
 520       if (!MP_ZEROP(w))
 521         break;
 522       MP_DROP(z);
 523       MP_DROP(w);
 524     }
 525     if (d) MP_DROP(d);
 526     MP_DROP(w);
 527     MP_DROP(spare);
 528     MP_DROP(rho);
 529     fr->ops->destroy(fr);
 530     d = z;
 531   }
 532
 533   t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
 534   if (!MP_EQ(t, x)) {
 535     MP_DROP(d);
 536     d = 0;
 537   }
 538   MP_DROP(t);
 539   MP_DROP(x);
 540   if (d) d->v[0] &= ~(mpw)1;
 541   return (d);
 542 }
 543
 544 /* --- @gfreduce_exp@ --- *
 545  *
 546  * Arguments:   @gfreduce *gr@ = pointer to reduction context
 547  *              @mp *d@ = fake destination
 548  *              @mp *a@ = base
 549  *              @mp *e@ = exponent
 550  *
 551  * Returns:     Result, %$a^e \bmod m$%.
 552  */
 553
 554 mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e)
 555 {
 556   mp *x = MP_ONE;
 557   mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
 558
 559   MP_SHRINK(e);
 560   MP_COPY(a);
 561   if (MP_ZEROP(e))
 562     ;
 563   else {
 564     if (MP_NEGP(e))
 565       a = gf_modinv(a, a, gr->p);
 566     if (MP_LEN(e) < EXP_THRESH)
 567       EXP_SIMPLE(x, a, e);
 568     else
 569       EXP_WINDOW(x, a, e);
 570   }
 571   mp_drop(d);
 572   mp_drop(a);
 573   mp_drop(spare);
 574   return (x);
 575 }
 576
 577 /*----- Test rig ----------------------------------------------------------*/
 578
 579 #ifdef TEST_RIG
 580
 581 static int vreduce(dstr *v)
 582 {
 583   mp *d = *(mp **)v[0].buf;
 584   mp *n = *(mp **)v[1].buf;
 585   mp *r = *(mp **)v[2].buf;
 586   mp *c;
 587   int ok = 1;
 588   gfreduce rr;
 589
 590   gfreduce_create(&rr, d);
 591   c = gfreduce_do(&rr, MP_NEW, n);
 592   if (!MP_EQ(c, r)) {
 593     fprintf(stderr, "\n*** reduction failed\n*** ");
 594     gfreduce_dump(&rr, stderr);
 595     fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
 596     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 597     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 598     fprintf(stderr, "\n");
 599     ok = 0;
 600   }
 601   gfreduce_destroy(&rr);
 602   mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
 603   assert(mparena_count(MPARENA_GLOBAL) == 0);
 604   return (ok);
 605 }
 606
 607 static int vmodexp(dstr *v)
 608 {
 609   mp *p = *(mp **)v[0].buf;
 610   mp *g = *(mp **)v[1].buf;
 611   mp *x = *(mp **)v[2].buf;
 612   mp *r = *(mp **)v[3].buf;
 613   mp *c;
 614   int ok = 1;
 615   gfreduce rr;
 616
 617   gfreduce_create(&rr, p);
 618   c = gfreduce_exp(&rr, MP_NEW, g, x);
 619   if (!MP_EQ(c, r)) {
 620     fprintf(stderr, "\n*** modexp failed\n*** ");
 621     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 622     fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
 623     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 624     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 625     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 626     fprintf(stderr, "\n");
 627     ok = 0;
 628   }
 629   gfreduce_destroy(&rr);
 630   mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
 631   assert(mparena_count(MPARENA_GLOBAL) == 0);
 632   return (ok);
 633 }
 634
 635 static int vsqrt(dstr *v)
 636 {
 637   mp *p = *(mp **)v[0].buf;
 638   mp *x = *(mp **)v[1].buf;
 639   mp *r = *(mp **)v[2].buf;
 640   mp *c;
 641   int ok = 1;
 642   gfreduce rr;
 643
 644   gfreduce_create(&rr, p);
 645   c = gfreduce_sqrt(&rr, MP_NEW, x);
 646   if (!MP_EQ(c, r)) {
 647     fprintf(stderr, "\n*** sqrt failed\n*** ");
 648     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 649     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 650     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 651     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 652     fprintf(stderr, "\n");
 653     ok = 0;
 654   }
 655   gfreduce_destroy(&rr);
 656   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 657   assert(mparena_count(MPARENA_GLOBAL) == 0);
 658   return (ok);
 659 }
 660
 661 static int vtr(dstr *v)
 662 {
 663   mp *p = *(mp **)v[0].buf;
 664   mp *x = *(mp **)v[1].buf;
 665   int r = *(int *)v[2].buf, c;
 666   int ok = 1;
 667   gfreduce rr;
 668
 669   gfreduce_create(&rr, p);
 670   c = gfreduce_trace(&rr, x);
 671   if (c != r) {
 672     fprintf(stderr, "\n*** trace failed\n*** ");
 673     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 674     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 675     fprintf(stderr, "\n*** c = %d", c);
 676     fprintf(stderr, "\n*** r = %d", r);
 677     fprintf(stderr, "\n");
 678     ok = 0;
 679   }
 680   gfreduce_destroy(&rr);
 681   mp_drop(p); mp_drop(x);
 682   assert(mparena_count(MPARENA_GLOBAL) == 0);
 683   return (ok);
 684 }
 685
 686 static int vhftr(dstr *v)
 687 {
 688   mp *p = *(mp **)v[0].buf;
 689   mp *x = *(mp **)v[1].buf;
 690   mp *r = *(mp **)v[2].buf;
 691   mp *c;
 692   int ok = 1;
 693   gfreduce rr;
 694
 695   gfreduce_create(&rr, p);
 696   c = gfreduce_halftrace(&rr, MP_NEW, x);
 697   if (!MP_EQ(c, r)) {
 698     fprintf(stderr, "\n*** halftrace failed\n*** ");
 699     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 700     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 701     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 702     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 703     fprintf(stderr, "\n");
 704     ok = 0;
 705   }
 706   gfreduce_destroy(&rr);
 707   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 708   assert(mparena_count(MPARENA_GLOBAL) == 0);
 709   return (ok);
 710 }
 711
 712 static int vquad(dstr *v)
 713 {
 714   mp *p = *(mp **)v[0].buf;
 715   mp *x = *(mp **)v[1].buf;
 716   mp *r = *(mp **)v[2].buf;
 717   mp *c;
 718   int ok = 1;
 719   gfreduce rr;
 720
 721   gfreduce_create(&rr, p);
 722   c = gfreduce_quadsolve(&rr, MP_NEW, x);
 723   if (!MP_EQ(c, r)) {
 724     fprintf(stderr, "\n*** quadsolve failed\n*** ");
 725     fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
 726     fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
 727     fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
 728     fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
 729     fprintf(stderr, "\n");
 730     ok = 0;
 731   }
 732   gfreduce_destroy(&rr);
 733   mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
 734   assert(mparena_count(MPARENA_GLOBAL) == 0);
 735   return (ok);
 736 }
 737
 738 static test_chunk defs[] = {
 739   { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
 740   { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
 741   { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
 742   { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
 743   { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
 744   { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
 745   { 0, 0, { 0 } }
 746 };
 747
 748 int main(int argc, char *argv[])
 749 {
 750   test_run(argc, argv, defs, SRCDIR"/t/gfreduce");
 751   return (0);
 752 }
 753
 754 #endif
 755
 756 /*----- That's all, folks -------------------------------------------------*/