chiark - git - mdw - catacomb/blob - mpx.c

   1 /* -*-c-*-
   2  *
   3  * $Id: mpx.c,v 1.10 2000/10/08 12:06:12 mdw Exp $
   4  *
   5  * Low-level multiprecision arithmetic
   6  *
   7  * (c) 1999 Straylight/Edgeware
   8  */
   9
  10 /*----- Licensing notice --------------------------------------------------*
  11  *
  12  * This file is part of Catacomb.
  13  *
  14  * Catacomb is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU Library General Public License as
  16  * published by the Free Software Foundation; either version 2 of the
  17  * License, or (at your option) any later version.
  18  *
  19  * Catacomb is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU Library General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU Library General Public
  25  * License along with Catacomb; if not, write to the Free
  26  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27  * MA 02111-1307, USA.
  28  */
  29
  30 /*----- Revision history --------------------------------------------------*
  31  *
  32  * $Log: mpx.c,v $
  33  * Revision 1.10  2000/10/08 12:06:12  mdw
  34  * Provide @mpx_ueq@ for rapidly testing equality of two integers.
  35  *
  36  * Revision 1.9  2000/06/26 07:52:50  mdw
  37  * Portability fix for the bug fix.
  38  *
  39  * Revision 1.8  2000/06/25 12:59:02  mdw
  40  * (mpx_udiv): Fix bug in quotient digit estimation.
  41  *
  42  * Revision 1.7  1999/12/22 15:49:07  mdw
  43  * New function for division by a small integer.
  44  *
  45  * Revision 1.6  1999/11/20 22:43:44  mdw
  46  * Integrate testing for MPX routines.
  47  *
  48  * Revision 1.5  1999/11/20 22:23:27  mdw
  49  * Add function versions of some low-level macros with wider use.
  50  *
  51  * Revision 1.4  1999/11/17 18:04:09  mdw
  52  * Add two's-complement functionality.  Improve mpx_udiv a little by
  53  * performing the multiplication of the divisor by q with the subtraction
  54  * from r.
  55  *
  56  * Revision 1.3  1999/11/13 01:57:31  mdw
  57  * Remove stray debugging code.
  58  *
  59  * Revision 1.2  1999/11/13 01:50:59  mdw
  60  * Multiprecision routines finished and tested.
  61  *
  62  * Revision 1.1  1999/09/03 08:41:12  mdw
  63  * Initial import.
  64  *
  65  */
  66
  67 /*----- Header files ------------------------------------------------------*/
  68
  69 #include <assert.h>
  70 #include <stdio.h>
  71 #include <stdlib.h>
  72 #include <string.h>
  73
  74 #include <mLib/bits.h>
  75
  76 #include "mptypes.h"
  77 #include "mpx.h"
  78
  79 /*----- Loading and storing -----------------------------------------------*/
  80
  81 /* --- @mpx_storel@ --- *
  82  *
  83  * Arguments:   @const mpw *v, *vl@ = base and limit of source vector
  84  *              @void *pp@ = pointer to octet array
  85  *              @size_t sz@ = size of octet array
  86  *
  87  * Returns:     ---
  88  *
  89  * Use:         Stores an MP in an octet array, least significant octet
  90  *              first.  High-end octets are silently discarded if there
  91  *              isn't enough space for them.
  92  */
  93
  94 void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
  95 {
  96   mpw n, w = 0;
  97   octet *p = pp, *q = p + sz;
  98   unsigned bits = 0;
  99
 100   while (p < q) {
 101     if (bits < 8) {
 102       if (v >= vl) {
 103         *p++ = U8(w);
 104         break;
 105       }
 106       n = *v++;
 107       *p++ = U8(w | n << bits);
 108       w = n >> (8 - bits);
 109       bits += MPW_BITS - 8;
 110     } else {
 111       *p++ = U8(w);
 112       w >>= 8;
 113       bits -= 8;
 114     }
 115   }
 116   memset(p, 0, q - p);
 117 }
 118
 119 /* --- @mpx_loadl@ --- *
 120  *
 121  * Arguments:   @mpw *v, *vl@ = base and limit of destination vector
 122  *              @const void *pp@ = pointer to octet array
 123  *              @size_t sz@ = size of octet array
 124  *
 125  * Returns:     ---
 126  *
 127  * Use:         Loads an MP in an octet array, least significant octet
 128  *              first.  High-end octets are ignored if there isn't enough
 129  *              space for them.
 130  */
 131
 132 void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
 133 {
 134   unsigned n;
 135   mpw w = 0;
 136   const octet *p = pp, *q = p + sz;
 137   unsigned bits = 0;
 138
 139   if (v >= vl)
 140     return;
 141   while (p < q) {
 142     n = U8(*p++);
 143     w |= n << bits;
 144     bits += 8;
 145     if (bits >= MPW_BITS) {
 146       *v++ = MPW(w);
 147       w = n >> (MPW_BITS - bits + 8);
 148       bits -= MPW_BITS;
 149       if (v >= vl)
 150         return;
 151     }
 152   }
 153   *v++ = w;
 154   MPX_ZERO(v, vl);
 155 }
 156
 157 /* --- @mpx_storeb@ --- *
 158  *
 159  * Arguments:   @const mpw *v, *vl@ = base and limit of source vector
 160  *              @void *pp@ = pointer to octet array
 161  *              @size_t sz@ = size of octet array
 162  *
 163  * Returns:     ---
 164  *
 165  * Use:         Stores an MP in an octet array, most significant octet
 166  *              first.  High-end octets are silently discarded if there
 167  *              isn't enough space for them.
 168  */
 169
 170 void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
 171 {
 172   mpw n, w = 0;
 173   octet *p = pp, *q = p + sz;
 174   unsigned bits = 0;
 175
 176   while (q > p) {
 177     if (bits < 8) {
 178       if (v >= vl) {
 179         *--q = U8(w);
 180         break;
 181       }
 182       n = *v++;
 183       *--q = U8(w | n << bits);
 184       w = n >> (8 - bits);
 185       bits += MPW_BITS - 8;
 186     } else {
 187       *--q = U8(w);
 188       w >>= 8;
 189       bits -= 8;
 190     }
 191   }
 192   memset(p, 0, q - p);
 193 }
 194
 195 /* --- @mpx_loadb@ --- *
 196  *
 197  * Arguments:   @mpw *v, *vl@ = base and limit of destination vector
 198  *              @const void *pp@ = pointer to octet array
 199  *              @size_t sz@ = size of octet array
 200  *
 201  * Returns:     ---
 202  *
 203  * Use:         Loads an MP in an octet array, most significant octet
 204  *              first.  High-end octets are ignored if there isn't enough
 205  *              space for them.
 206  */
 207
 208 void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
 209 {
 210   unsigned n;
 211   mpw w = 0;
 212   const octet *p = pp, *q = p + sz;
 213   unsigned bits = 0;
 214
 215   if (v >= vl)
 216     return;
 217   while (q > p) {
 218     n = U8(*--q);
 219     w |= n << bits;
 220     bits += 8;
 221     if (bits >= MPW_BITS) {
 222       *v++ = MPW(w);
 223       w = n >> (MPW_BITS - bits + 8);
 224       bits -= MPW_BITS;
 225       if (v >= vl)
 226         return;
 227     }
 228   }
 229   *v++ = w;
 230   MPX_ZERO(v, vl);
 231 }
 232
 233 /*----- Logical shifting --------------------------------------------------*/
 234
 235 /* --- @mpx_lsl@ --- *
 236  *
 237  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 238  *              @const mpw *av, *avl@ = source vector base and limit
 239  *              @size_t n@ = number of bit positions to shift by
 240  *
 241  * Returns:     ---
 242  *
 243  * Use:         Performs a logical shift left operation on an integer.
 244  */
 245
 246 void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
 247 {
 248   size_t nw;
 249   unsigned nb;
 250
 251   /* --- Trivial special case --- */
 252
 253   if (n == 0)
 254     MPX_COPY(dv, dvl, av, avl);
 255
 256   /* --- Single bit shifting --- */
 257
 258   else if (n == 1) {
 259     mpw w = 0;
 260     while (av < avl) {
 261       mpw t;
 262       if (dv >= dvl)
 263         goto done;
 264       t = *av++;
 265       *dv++ = MPW((t << 1) | w);
 266       w = t >> (MPW_BITS - 1);
 267     }
 268     if (dv >= dvl)
 269       goto done;
 270     *dv++ = MPW(w);
 271     MPX_ZERO(dv, dvl);
 272     goto done;
 273   }
 274
 275   /* --- Break out word and bit shifts for more sophisticated work --- */
 276
 277   nw = n / MPW_BITS;
 278   nb = n % MPW_BITS;
 279
 280   /* --- Handle a shift by a multiple of the word size --- */
 281
 282   if (nb == 0) {
 283     MPX_COPY(dv + nw, dvl, av, avl);
 284     memset(dv, 0, MPWS(nw));
 285   }
 286
 287   /* --- And finally the difficult case --- *
 288    *
 289    * This is a little convoluted, because I have to start from the end and
 290    * work backwards to avoid overwriting the source, if they're both the same
 291    * block of memory.
 292    */
 293
 294   else {
 295     mpw w;
 296     size_t nr = MPW_BITS - nb;
 297     size_t dvn = dvl - dv;
 298     size_t avn = avl - av;
 299
 300     if (dvn <= nw) {
 301       MPX_ZERO(dv, dvl);
 302       goto done;
 303     }
 304
 305     if (dvn > avn + nw) {
 306       size_t off = avn + nw + 1;
 307       MPX_ZERO(dv + off, dvl);
 308       dvl = dv + off;
 309       w = 0;
 310     } else {
 311       avl = av + dvn - nw;
 312       w = *--avl << nb;
 313     }
 314
 315     while (avl > av) {
 316       mpw t = *--avl;
 317       *--dvl = (t >> nr) | w;
 318       w = t << nb;
 319     }
 320
 321     *--dvl = w;
 322     MPX_ZERO(dv, dvl);
 323   }
 324
 325 done:;
 326 }
 327
 328 /* --- @mpx_lsr@ --- *
 329  *
 330  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 331  *              @const mpw *av, *avl@ = source vector base and limit
 332  *              @size_t n@ = number of bit positions to shift by
 333  *
 334  * Returns:     ---
 335  *
 336  * Use:         Performs a logical shift right operation on an integer.
 337  */
 338
 339 void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
 340 {
 341   size_t nw;
 342   unsigned nb;
 343
 344   /* --- Trivial special case --- */
 345
 346   if (n == 0)
 347     MPX_COPY(dv, dvl, av, avl);
 348
 349   /* --- Single bit shifting --- */
 350
 351   else if (n == 1) {
 352     mpw w = *av++ >> 1;
 353     while (av < avl) {
 354       mpw t;
 355       if (dv >= dvl)
 356         goto done;
 357       t = *av++;
 358       *dv++ = MPW((t << (MPW_BITS - 1)) | w);
 359       w = t >> 1;
 360     }
 361     if (dv >= dvl)
 362       goto done;
 363     *dv++ = MPW(w);
 364     MPX_ZERO(dv, dvl);
 365     goto done;
 366   }
 367
 368   /* --- Break out word and bit shifts for more sophisticated work --- */
 369
 370   nw = n / MPW_BITS;
 371   nb = n % MPW_BITS;
 372
 373   /* --- Handle a shift by a multiple of the word size --- */
 374
 375   if (nb == 0)
 376     MPX_COPY(dv, dvl, av + nw, avl);
 377
 378   /* --- And finally the difficult case --- */
 379
 380   else {
 381     mpw w;
 382     size_t nr = MPW_BITS - nb;
 383
 384     av += nw;
 385     w = *av++;
 386     while (av < avl) {
 387       mpw t;
 388       if (dv >= dvl)
 389         goto done;
 390       t = *av++;
 391       *dv++ = MPW((w >> nb) | (t << nr));
 392       w = t;
 393     }
 394     if (dv < dvl) {
 395       *dv++ = MPW(w >> nb);
 396       MPX_ZERO(dv, dvl);
 397     }
 398   }
 399
 400 done:;
 401 }
 402
 403 /*----- Unsigned arithmetic -----------------------------------------------*/
 404
 405 /* --- @mpx_2c@ --- *
 406  *
 407  * Arguments:   @mpw *dv, *dvl@ = destination vector
 408  *              @const mpw *v, *vl@ = source vector
 409  *
 410  * Returns:     ---
 411  *
 412  * Use:         Calculates the two's complement of @v@.
 413  */
 414
 415 void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
 416 {
 417   mpw c = 0;
 418   while (dv < dvl && v < vl)
 419     *dv++ = c = MPW(~*v++);
 420   if (dv < dvl) {
 421     if (c > MPW_MAX / 2)
 422       c = MPW(~0);
 423     while (dv < dvl)
 424       *dv++ = c;
 425   }
 426   MPX_UADDN(dv, dvl, 1);
 427 }
 428
 429 /* --- @mpx_ueq@ --- *
 430  *
 431  * Arguments:   @const mpw *av, *avl@ = first argument vector base and limit
 432  *              @const mpw *bv, *bvl@ = second argument vector base and limit
 433  *
 434  * Returns:     Nonzero if the two vectors are equal.
 435  *
 436  * Use:         Performs an unsigned integer test for equality.
 437  */
 438
 439 int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
 440 {
 441   MPX_SHRINK(av, avl);
 442   MPX_SHRINK(bv, bvl);
 443   if (avl - av != bvl - bv)
 444     return (0);
 445   while (av < avl) {
 446     if (*av++ != *bv++)
 447       return (0);
 448   }
 449   return (1);
 450 }
 451
 452 /* --- @mpx_ucmp@ --- *
 453  *
 454  * Arguments:   @const mpw *av, *avl@ = first argument vector base and limit
 455  *              @const mpw *bv, *bvl@ = second argument vector base and limit
 456  *
 457  * Returns:     Less than, equal to, or greater than zero depending on
 458  *              whether @a@ is less than, equal to or greater than @b@,
 459  *              respectively.
 460  *
 461  * Use:         Performs an unsigned integer comparison.
 462  */
 463
 464 int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
 465 {
 466   MPX_SHRINK(av, avl);
 467   MPX_SHRINK(bv, bvl);
 468
 469   if (avl - av > bvl - bv)
 470     return (+1);
 471   else if (avl - av < bvl - bv)
 472     return (-1);
 473   else while (avl > av) {
 474     mpw a = *--avl, b = *--bvl;
 475     if (a > b)
 476       return (+1);
 477     else if (a < b)
 478       return (-1);
 479   }
 480   return (0);
 481 }
 482
 483 /* --- @mpx_uadd@ --- *
 484  *
 485  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 486  *              @const mpw *av, *avl@ = first addend vector base and limit
 487  *              @const mpw *bv, *bvl@ = second addend vector base and limit
 488  *
 489  * Returns:     ---
 490  *
 491  * Use:         Performs unsigned integer addition.  If the result overflows
 492  *              the destination vector, high-order bits are discarded.  This
 493  *              means that two's complement addition happens more or less for
 494  *              free, although that's more a side-effect than anything else.
 495  *              The result vector may be equal to either or both source
 496  *              vectors, but may not otherwise overlap them.
 497  */
 498
 499 void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 500               const mpw *bv, const mpw *bvl)
 501 {
 502   mpw c = 0;
 503
 504   while (av < avl || bv < bvl) {
 505     mpw a, b;
 506     mpd x;
 507     if (dv >= dvl)
 508       return;
 509     a = (av < avl) ? *av++ : 0;
 510     b = (bv < bvl) ? *bv++ : 0;
 511     x = (mpd)a + (mpd)b + c;
 512     *dv++ = MPW(x);
 513     c = x >> MPW_BITS;
 514   }
 515   if (dv < dvl) {
 516     *dv++ = c;
 517     MPX_ZERO(dv, dvl);
 518   }
 519 }
 520
 521 /* --- @mpx_uaddn@ --- *
 522  *
 523  * Arguments:   @mpw *dv, *dvl@ = source and destination base and limit
 524  *              @mpw n@ = other addend
 525  *
 526  * Returns:     ---
 527  *
 528  * Use:         Adds a small integer to a multiprecision number.
 529  */
 530
 531 void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
 532
 533 /* --- @mpx_usub@ --- *
 534  *
 535  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 536  *              @const mpw *av, *avl@ = first argument vector base and limit
 537  *              @const mpw *bv, *bvl@ = second argument vector base and limit
 538  *
 539  * Returns:     ---
 540  *
 541  * Use:         Performs unsigned integer subtraction.  If the result
 542  *              overflows the destination vector, high-order bits are
 543  *              discarded.  This means that two's complement subtraction
 544  *              happens more or less for free, althuogh that's more a side-
 545  *              effect than anything else.  The result vector may be equal to
 546  *              either or both source vectors, but may not otherwise overlap
 547  *              them.
 548  */
 549
 550 void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 551               const mpw *bv, const mpw *bvl)
 552 {
 553   mpw c = 0;
 554
 555   while (av < avl || bv < bvl) {
 556     mpw a, b;
 557     mpd x;
 558     if (dv >= dvl)
 559       return;
 560     a = (av < avl) ? *av++ : 0;
 561     b = (bv < bvl) ? *bv++ : 0;
 562     x = (mpd)a - (mpd)b - c;
 563     *dv++ = MPW(x);
 564     if (x >> MPW_BITS)
 565       c = 1;
 566     else
 567       c = 0;
 568   }
 569   if (c)
 570     c = MPW_MAX;
 571   while (dv < dvl)
 572     *dv++ = c;
 573 }
 574
 575 /* --- @mpx_usubn@ --- *
 576  *
 577  * Arguments:   @mpw *dv, *dvl@ = source and destination base and limit
 578  *              @n@ = subtrahend
 579  *
 580  * Returns:     ---
 581  *
 582  * Use:         Subtracts a small integer from a multiprecision number.
 583  */
 584
 585 void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
 586
 587 /* --- @mpx_umul@ --- *
 588  *
 589  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 590  *              @const mpw *av, *avl@ = multiplicand vector base and limit
 591  *              @const mpw *bv, *bvl@ = multiplier vector base and limit
 592  *
 593  * Returns:     ---
 594  *
 595  * Use:         Performs unsigned integer multiplication.  If the result
 596  *              overflows the desination vector, high-order bits are
 597  *              discarded.  The result vector may not overlap the argument
 598  *              vectors in any way.
 599  */
 600
 601 void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 602               const mpw *bv, const mpw *bvl)
 603 {
 604   /* --- This is probably worthwhile on a multiply --- */
 605
 606   MPX_SHRINK(av, avl);
 607   MPX_SHRINK(bv, bvl);
 608
 609   /* --- Deal with a multiply by zero --- */
 610
 611   if (bv == bvl) {
 612     MPX_ZERO(dv, dvl);
 613     return;
 614   }
 615
 616   /* --- Do the initial multiply and initialize the accumulator --- */
 617
 618   MPX_UMULN(dv, dvl, av, avl, *bv++);
 619
 620   /* --- Do the remaining multiply/accumulates --- */
 621
 622   while (dv < dvl && bv < bvl) {
 623     mpw m = *bv++;
 624     mpw c = 0;
 625     const mpw *avv = av;
 626     mpw *dvv = ++dv;
 627
 628     while (avv < avl) {
 629       mpd x;
 630       if (dvv >= dvl)
 631         goto next;
 632       x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
 633       *dvv++ = MPW(x);
 634       c = x >> MPW_BITS;
 635     }
 636     MPX_UADDN(dvv, dvl, c);
 637   next:;
 638   }
 639 }
 640
 641 /* --- @mpx_umuln@ --- *
 642  *
 643  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 644  *              @const mpw *av, *avl@ = multiplicand vector base and limit
 645  *              @mpw m@ = multiplier
 646  *
 647  * Returns:     ---
 648  *
 649  * Use:         Multiplies a multiprecision integer by a single-word value.
 650  *              The destination and source may be equal.  The destination
 651  *              is completely cleared after use.
 652  */
 653
 654 void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
 655 {
 656   MPX_UMULN(dv, dvl, av, avl, m);
 657 }
 658
 659 /* --- @mpx_umlan@ --- *
 660  *
 661  * Arguments:   @mpw *dv, *dvl@ = destination/accumulator base and limit
 662  *              @const mpw *av, *avl@ = multiplicand vector base and limit
 663  *              @mpw m@ = multiplier
 664  *
 665  * Returns:     ---
 666  *
 667  * Use:         Multiplies a multiprecision integer by a single-word value
 668  *              and adds the result to an accumulator.
 669  */
 670
 671 void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
 672 {
 673   MPX_UMLAN(dv, dvl, av, avl, m);
 674 }
 675
 676 /* --- @mpx_usqr@ --- *
 677  *
 678  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 679  *              @const mpw *av, *av@ = source vector base and limit
 680  *
 681  * Returns:     ---
 682  *
 683  * Use:         Performs unsigned integer squaring.  The result vector must
 684  *              not overlap the source vector in any way.
 685  */
 686
 687 void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
 688 {
 689   MPX_ZERO(dv, dvl);
 690
 691   /* --- Main loop --- */
 692
 693   while (av < avl) {
 694     const mpw *avv = av;
 695     mpw *dvv = dv;
 696     mpw a = *av;
 697     mpd c;
 698
 699     /* --- Stop if I've run out of destination --- */
 700
 701     if (dvv >= dvl)
 702       break;
 703
 704     /* --- Work out the square at this point in the proceedings --- */
 705
 706     {
 707       mpd x = (mpd)a * (mpd)a + *dvv;
 708       *dvv++ = MPW(x);
 709       c = MPW(x >> MPW_BITS);
 710     }
 711
 712     /* --- Now fix up the rest of the vector upwards --- */
 713
 714     avv++;
 715     while (dvv < dvl && avv < avl) {
 716       mpd x = (mpd)a * (mpd)*avv++;
 717       mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
 718       c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
 719       *dvv++ = MPW(y);
 720     }
 721     while (dvv < dvl && c) {
 722       mpd x = c + *dvv;
 723       *dvv++ = MPW(x);
 724       c = x >> MPW_BITS;
 725     }
 726
 727     /* --- Get ready for the next round --- */
 728
 729     av++;
 730     dv += 2;
 731   }
 732 }
 733
 734 /* --- @mpx_udiv@ --- *
 735  *
 736  * Arguments:   @mpw *qv, *qvl@ = quotient vector base and limit
 737  *              @mpw *rv, *rvl@ = dividend/remainder vector base and limit
 738  *              @const mpw *dv, *dvl@ = divisor vector base and limit
 739  *              @mpw *sv, *svl@ = scratch workspace
 740  *
 741  * Returns:     ---
 742  *
 743  * Use:         Performs unsigned integer division.  If the result overflows
 744  *              the quotient vector, high-order bits are discarded.  (Clearly
 745  *              the remainder vector can't overflow.)  The various vectors
 746  *              may not overlap in any way.  Yes, I know it's a bit odd
 747  *              requiring the dividend to be in the result position but it
 748  *              does make some sense really.  The remainder must have
 749  *              headroom for at least two extra words.  The scratch space
 750  *              must be at least one word larger than the divisor.
 751  */
 752
 753 void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
 754               const mpw *dv, const mpw *dvl,
 755               mpw *sv, mpw *svl)
 756 {
 757   unsigned norm = 0;
 758   size_t scale;
 759   mpw d, dd;
 760
 761   /* --- Initialize the quotient --- */
 762
 763   MPX_ZERO(qv, qvl);
 764
 765   /* --- Perform some sanity checks --- */
 766
 767   MPX_SHRINK(dv, dvl);
 768   assert(((void)"division by zero in mpx_udiv", dv < dvl));
 769
 770   /* --- Normalize the divisor --- *
 771    *
 772    * The algorithm requires that the divisor be at least two digits long.
 773    * This is easy to fix.
 774    */
 775
 776   {
 777     unsigned b;
 778
 779     d = dvl[-1];
 780     for (b = MPW_BITS / 2; b; b >>= 1) {
 781       if (d < (MPW_MAX >> b)) {
 782         d <<= b;
 783         norm += b;
 784       }
 785     }
 786     if (dv + 1 == dvl)
 787       norm += MPW_BITS;
 788   }
 789
 790   /* --- Normalize the dividend/remainder to match --- */
 791
 792   if (norm) {
 793     mpx_lsl(rv, rvl, rv, rvl, norm);
 794     mpx_lsl(sv, svl, dv, dvl, norm);
 795     dv = sv;
 796     dvl = svl;
 797     MPX_SHRINK(dv, dvl);
 798   }
 799
 800   MPX_SHRINK(rv, rvl);
 801   d = dvl[-1];
 802   dd = dvl[-2];
 803
 804   /* --- Work out the relative scales --- */
 805
 806   {
 807     size_t rvn = rvl - rv;
 808     size_t dvn = dvl - dv;
 809
 810     /* --- If the divisor is clearly larger, notice this --- */
 811
 812     if (dvn > rvn) {
 813       mpx_lsr(rv, rvl, rv, rvl, norm);
 814       return;
 815     }
 816
 817     scale = rvn - dvn;
 818   }
 819
 820   /* --- Calculate the most significant quotient digit --- *
 821    *
 822    * Because the divisor has its top bit set, this can only happen once.  The
 823    * pointer arithmetic is a little contorted, to make sure that the
 824    * behaviour is defined.
 825    */
 826
 827   if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
 828     mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
 829     if (qvl - qv > scale)
 830       qv[scale] = 1;
 831   }
 832
 833   /* --- Now for the main loop --- */
 834
 835   {
 836     mpw *rvv = rvl - 2;
 837
 838     while (scale) {
 839       mpw q;
 840       mpd rh;
 841
 842       /* --- Get an estimate for the next quotient digit --- */
 843
 844       mpw r = rvv[1];
 845       mpw rr = rvv[0];
 846       mpw rrr = *--rvv;
 847
 848       scale--;
 849       rh = ((mpd)r << MPW_BITS) | rr;
 850       if (r == d)
 851         q = MPW_MAX;
 852       else
 853         q = MPW(rh / d);
 854
 855       /* --- Refine the estimate --- */
 856
 857       {
 858         mpd yh = (mpd)d * q;
 859         mpd yy = (mpd)dd * q;
 860         mpw yl;
 861
 862         if (yy > MPW_MAX)
 863           yh += yy >> MPW_BITS;
 864         yl = MPW(yy);
 865
 866         while (yh > rh || (yh == rh && yl > rrr)) {
 867           q--;
 868           yh -= d;
 869           if (yl < dd)
 870             yh--;
 871           yl = MPW(yl - dd);
 872         }
 873       }
 874
 875       /* --- Remove a chunk from the dividend --- */
 876
 877       {
 878         mpw *svv;
 879         const mpw *dvv;
 880         mpw mc = 0, sc = 0;
 881
 882         /* --- Calculate the size of the chunk --- *
 883          *
 884          * This does the whole job of calculating @r >> scale - qd@.
 885          */
 886
 887         for (svv = rv + scale, dvv = dv;
 888              dvv < dvl && svv < rvl;
 889              svv++, dvv++) {
 890           mpd x = (mpd)*dvv * (mpd)q + mc;
 891           mc = x >> MPW_BITS;
 892           x = (mpd)*svv - MPW(x) - sc;
 893           *svv = MPW(x);
 894           if (x >> MPW_BITS)
 895             sc = 1;
 896           else
 897             sc = 0;
 898         }
 899
 900         if (svv < rvl) {
 901           mpd x = (mpd)*svv - mc - sc;
 902           *svv++ = MPW(x);
 903           if (x >> MPW_BITS)
 904             sc = MPW_MAX;
 905           else
 906             sc = 0;
 907           while (svv < rvl)
 908             *svv++ = sc;
 909         }
 910
 911         /* --- Fix if the quotient was too large --- *
 912          *
 913          * This doesn't seem to happen very often.
 914          */
 915
 916         if (rvl[-1] > MPW_MAX / 2) {
 917           mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
 918           q--;
 919         }
 920       }
 921
 922       /* --- Done for another iteration --- */
 923
 924       if (qvl - qv > scale)
 925         qv[scale] = q;
 926       r = rr;
 927       rr = rrr;
 928     }
 929   }
 930
 931   /* --- Now fiddle with unnormalizing and things --- */
 932
 933   mpx_lsr(rv, rvl, rv, rvl, norm);
 934 }
 935
 936 /* --- @mpx_udivn@ --- *
 937  *
 938  * Arguments:   @mpw *qv, *qvl@ = storage for the quotient (may overlap
 939  *                      dividend)
 940  *              @const mpw *rv, *rvl@ = dividend
 941  *              @mpw d@ = single-precision divisor
 942  *
 943  * Returns:     Remainder after divison.
 944  *
 945  * Use:         Performs a single-precision division operation.
 946  */
 947
 948 mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d)
 949 {
 950   size_t i;
 951   size_t ql = qvl - qv;
 952   mpd r = 0;
 953
 954   i = rvl - rv;
 955   while (i > 0) {
 956     i--;
 957     r = (r << MPW_BITS) | rv[i];
 958     if (i < ql)
 959       qv[i] = r / d;
 960     r %= d;
 961   }
 962   return (MPW(r));
 963 }
 964
 965 /*----- Test rig ----------------------------------------------------------*/
 966
 967 #ifdef TEST_RIG
 968
 969 #include <mLib/alloc.h>
 970 #include <mLib/dstr.h>
 971 #include <mLib/quis.h>
 972 #include <mLib/testrig.h>
 973
 974 #include "mpscan.h"
 975
 976 #define ALLOC(v, vl, sz) do {                                           \
 977   size_t _sz = (sz);                                                    \
 978   mpw *_vv = xmalloc(MPWS(_sz));                                        \
 979   mpw *_vvl = _vv + _sz;                                                \
 980   (v) = _vv;                                                            \
 981   (vl) = _vvl;                                                          \
 982 } while (0)
 983
 984 #define LOAD(v, vl, d) do {                                             \
 985   const dstr *_d = (d);                                                 \
 986   mpw *_v, *_vl;                                                        \
 987   ALLOC(_v, _vl, MPW_RQ(_d->len));                                      \
 988   mpx_loadb(_v, _vl, _d->buf, _d->len);                                 \
 989   (v) = _v;                                                             \
 990   (vl) = _vl;                                                           \
 991 } while (0)
 992
 993 #define MAX(x, y) ((x) > (y) ? (x) : (y))
 994
 995 static void dumpbits(const char *msg, const void *pp, size_t sz)
 996 {
 997   const octet *p = pp;
 998   fputs(msg, stderr);
 999   for (; sz; sz--)
1000     fprintf(stderr, " %02x", *p++);
1001   fputc('\n', stderr);
1002 }
1003
1004 static void dumpmp(const char *msg, const mpw *v, const mpw *vl)
1005 {
1006   fputs(msg, stderr);
1007   MPX_SHRINK(v, vl);
1008   while (v < vl)
1009     fprintf(stderr, " %08lx", (unsigned long)*--vl);
1010   fputc('\n', stderr);
1011 }
1012
1013 static int chkscan(const mpw *v, const mpw *vl,
1014                    const void *pp, size_t sz, int step)
1015 {
1016   mpscan mps;
1017   const octet *p = pp;
1018   unsigned bit = 0;
1019   int ok = 1;
1020
1021   mpscan_initx(&mps, v, vl);
1022   while (sz) {
1023     unsigned x = *p;
1024     int i;
1025     p += step;
1026     for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) {
1027       if (MPSCAN_BIT(&mps) != (x & 1)) {
1028         fprintf(stderr,
1029                 "\n*** error, step %i, bit %u, expected %u, found %u\n",
1030                 step, bit, x & 1, MPSCAN_BIT(&mps));
1031         ok = 0;
1032       }
1033       x >>= 1;
1034       bit++;
1035     }
1036     sz--;
1037   }
1038
1039   return (ok);
1040 }
1041
1042 static int loadstore(dstr *v)
1043 {
1044   dstr d = DSTR_INIT;
1045   size_t sz = MPW_RQ(v->len) * 2, diff;
1046   mpw *m, *ml;
1047   int ok = 1;
1048
1049   dstr_ensure(&d, v->len);
1050   m = xmalloc(MPWS(sz));
1051
1052   for (diff = 0; diff < sz; diff += 5) {
1053     size_t oct;
1054
1055     ml = m + sz - diff;
1056
1057     mpx_loadl(m, ml, v->buf, v->len);
1058     if (!chkscan(m, ml, v->buf, v->len, +1))
1059       ok = 0;
1060     MPX_OCTETS(oct, m, ml);
1061     mpx_storel(m, ml, d.buf, d.sz);
1062     if (memcmp(d.buf, v->buf, oct) != 0) {
1063       dumpbits("\n*** storel failed", d.buf, d.sz);
1064       ok = 0;
1065     }
1066
1067     mpx_loadb(m, ml, v->buf, v->len);
1068     if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1))
1069       ok = 0;
1070     MPX_OCTETS(oct, m, ml);
1071     mpx_storeb(m, ml, d.buf, d.sz);
1072     if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) {
1073       dumpbits("\n*** storeb failed", d.buf, d.sz);
1074       ok = 0;
1075     }
1076   }
1077
1078   if (!ok)
1079     dumpbits("input data", v->buf, v->len);
1080
1081   free(m);
1082   dstr_destroy(&d);
1083   return (ok);
1084 }
1085
1086 static int lsl(dstr *v)
1087 {
1088   mpw *a, *al;
1089   int n = *(int *)v[1].buf;
1090   mpw *c, *cl;
1091   mpw *d, *dl;
1092   int ok = 1;
1093
1094   LOAD(a, al, &v[0]);
1095   LOAD(c, cl, &v[2]);
1096   ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS);
1097
1098   mpx_lsl(d, dl, a, al, n);
1099   if (!mpx_ueq(d, dl, c, cl)) {
1100     fprintf(stderr, "\n*** lsl(%i) failed\n", n);
1101     dumpmp("       a", a, al);
1102     dumpmp("expected", c, cl);
1103     dumpmp("  result", d, dl);
1104     ok = 0;
1105   }
1106
1107   free(a); free(c); free(d);
1108   return (ok);
1109 }
1110
1111 static int lsr(dstr *v)
1112 {
1113   mpw *a, *al;
1114   int n = *(int *)v[1].buf;
1115   mpw *c, *cl;
1116   mpw *d, *dl;
1117   int ok = 1;
1118
1119   LOAD(a, al, &v[0]);
1120   LOAD(c, cl, &v[2]);
1121   ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1);
1122
1123   mpx_lsr(d, dl, a, al, n);
1124   if (!mpx_ueq(d, dl, c, cl)) {
1125     fprintf(stderr, "\n*** lsr(%i) failed\n", n);
1126     dumpmp("       a", a, al);
1127     dumpmp("expected", c, cl);
1128     dumpmp("  result", d, dl);
1129     ok = 0;
1130   }
1131
1132   free(a); free(c); free(d);
1133   return (ok);
1134 }
1135
1136 static int uadd(dstr *v)
1137 {
1138   mpw *a, *al;
1139   mpw *b, *bl;
1140   mpw *c, *cl;
1141   mpw *d, *dl;
1142   int ok = 1;
1143
1144   LOAD(a, al, &v[0]);
1145   LOAD(b, bl, &v[1]);
1146   LOAD(c, cl, &v[2]);
1147   ALLOC(d, dl, MAX(al - a, bl - b) + 1);
1148
1149   mpx_uadd(d, dl, a, al, b, bl);
1150   if (!mpx_ueq(d, dl, c, cl)) {
1151     fprintf(stderr, "\n*** uadd failed\n");
1152     dumpmp("       a", a, al);
1153     dumpmp("       b", b, bl);
1154     dumpmp("expected", c, cl);
1155     dumpmp("  result", d, dl);
1156     ok = 0;
1157   }
1158
1159   free(a); free(b); free(c); free(d);
1160   return (ok);
1161 }
1162
1163 static int usub(dstr *v)
1164 {
1165   mpw *a, *al;
1166   mpw *b, *bl;
1167   mpw *c, *cl;
1168   mpw *d, *dl;
1169   int ok = 1;
1170
1171   LOAD(a, al, &v[0]);
1172   LOAD(b, bl, &v[1]);
1173   LOAD(c, cl, &v[2]);
1174   ALLOC(d, dl, al - a);
1175
1176   mpx_usub(d, dl, a, al, b, bl);
1177   if (!mpx_ueq(d, dl, c, cl)) {
1178     fprintf(stderr, "\n*** usub failed\n");
1179     dumpmp("       a", a, al);
1180     dumpmp("       b", b, bl);
1181     dumpmp("expected", c, cl);
1182     dumpmp("  result", d, dl);
1183     ok = 0;
1184   }
1185
1186   free(a); free(b); free(c); free(d);
1187   return (ok);
1188 }
1189
1190 static int umul(dstr *v)
1191 {
1192   mpw *a, *al;
1193   mpw *b, *bl;
1194   mpw *c, *cl;
1195   mpw *d, *dl;
1196   int ok = 1;
1197
1198   LOAD(a, al, &v[0]);
1199   LOAD(b, bl, &v[1]);
1200   LOAD(c, cl, &v[2]);
1201   ALLOC(d, dl, (al - a) + (bl - b));
1202
1203   mpx_umul(d, dl, a, al, b, bl);
1204   if (!mpx_ueq(d, dl, c, cl)) {
1205     fprintf(stderr, "\n*** umul failed\n");
1206     dumpmp("       a", a, al);
1207     dumpmp("       b", b, bl);
1208     dumpmp("expected", c, cl);
1209     dumpmp("  result", d, dl);
1210     ok = 0;
1211   }
1212
1213   free(a); free(b); free(c); free(d);
1214   return (ok);
1215 }
1216
1217 static int usqr(dstr *v)
1218 {
1219   mpw *a, *al;
1220   mpw *c, *cl;
1221   mpw *d, *dl;
1222   int ok = 1;
1223
1224   LOAD(a, al, &v[0]);
1225   LOAD(c, cl, &v[1]);
1226   ALLOC(d, dl, 2 * (al - a));
1227
1228   mpx_usqr(d, dl, a, al);
1229   if (!mpx_ueq(d, dl, c, cl)) {
1230     fprintf(stderr, "\n*** usqr failed\n");
1231     dumpmp("       a", a, al);
1232     dumpmp("expected", c, cl);
1233     dumpmp("  result", d, dl);
1234     ok = 0;
1235   }
1236
1237   free(a); free(c); free(d);
1238   return (ok);
1239 }
1240
1241 static int udiv(dstr *v)
1242 {
1243   mpw *a, *al;
1244   mpw *b, *bl;
1245   mpw *q, *ql;
1246   mpw *r, *rl;
1247   mpw *qq, *qql;
1248   mpw *s, *sl;
1249   int ok = 1;
1250
1251   ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len);
1252   LOAD(b, bl, &v[1]);
1253   LOAD(q, ql, &v[2]);
1254   LOAD(r, rl, &v[3]);
1255   ALLOC(qq, qql, al - a);
1256   ALLOC(s, sl, (bl - b) + 1);
1257
1258   mpx_udiv(qq, qql, a, al, b, bl, s, sl);
1259   if (!mpx_ueq(qq, qql, q, ql) ||
1260       !mpx_ueq(a, al, r, rl)) {
1261     fprintf(stderr, "\n*** udiv failed\n");
1262     dumpmp(" divisor", b, bl);
1263     dumpmp("expect r", r, rl);
1264     dumpmp("result r", a, al);
1265     dumpmp("expect q", q, ql);
1266     dumpmp("result q", qq, qql);
1267     ok = 0;
1268   }
1269
1270   free(a); free(b); free(r); free(q); free(s); free(qq);
1271   return (ok);
1272 }
1273
1274 static test_chunk defs[] = {
1275   { "load-store", loadstore, { &type_hex, 0 } },
1276   { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } },
1277   { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } },
1278   { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } },
1279   { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } },
1280   { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } },
1281   { "usqr", usqr, { &type_hex, &type_hex, 0 } },
1282   { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } },
1283   { 0, 0, { 0 } }
1284 };
1285
1286 int main(int argc, char *argv[])
1287 {
1288   test_run(argc, argv, defs, SRCDIR"/tests/mpx");
1289   return (0);
1290 }
1291
1292 #endif
1293
1294 /*----- That's all, folks -------------------------------------------------*/