chiark - git - mdw - catacomb/blob - mpx.c

   1 /* -*-c-*-
   2  *
   3  * $Id: mpx.c,v 1.4 1999/11/17 18:04:09 mdw Exp $
   4  *
   5  * Low-level multiprecision arithmetic
   6  *
   7  * (c) 1999 Straylight/Edgeware
   8  */
   9
  10 /*----- Licensing notice --------------------------------------------------*
  11  *
  12  * This file is part of Catacomb.
  13  *
  14  * Catacomb is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU Library General Public License as
  16  * published by the Free Software Foundation; either version 2 of the
  17  * License, or (at your option) any later version.
  18  *
  19  * Catacomb is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU Library General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU Library General Public
  25  * License along with Catacomb; if not, write to the Free
  26  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27  * MA 02111-1307, USA.
  28  */
  29
  30 /*----- Revision history --------------------------------------------------*
  31  *
  32  * $Log: mpx.c,v $
  33  * Revision 1.4  1999/11/17 18:04:09  mdw
  34  * Add two's-complement functionality.  Improve mpx_udiv a little by
  35  * performing the multiplication of the divisor by q with the subtraction
  36  * from r.
  37  *
  38  * Revision 1.3  1999/11/13 01:57:31  mdw
  39  * Remove stray debugging code.
  40  *
  41  * Revision 1.2  1999/11/13 01:50:59  mdw
  42  * Multiprecision routines finished and tested.
  43  *
  44  * Revision 1.1  1999/09/03 08:41:12  mdw
  45  * Initial import.
  46  *
  47  */
  48
  49 /*----- Header files ------------------------------------------------------*/
  50
  51 #include <assert.h>
  52 #include <stdio.h>
  53 #include <stdlib.h>
  54 #include <string.h>
  55
  56 #include <mLib/bits.h>
  57
  58 #include "mptypes.h"
  59 #include "mpx.h"
  60
  61 /*----- Loading and storing -----------------------------------------------*/
  62
  63 /* --- @mpx_storel@ --- *
  64  *
  65  * Arguments:   @const mpw *v, *vl@ = base and limit of source vector
  66  *              @void *pp@ = pointer to octet array
  67  *              @size_t sz@ = size of octet array
  68  *
  69  * Returns:     ---
  70  *
  71  * Use:         Stores an MP in an octet array, least significant octet
  72  *              first.  High-end octets are silently discarded if there
  73  *              isn't enough space for them.
  74  */
  75
  76 void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
  77 {
  78   mpw n, w = 0;
  79   octet *p = pp, *q = p + sz;
  80   unsigned bits = 0;
  81
  82   while (p < q) {
  83     if (bits < 8) {
  84       if (v >= vl) {
  85         *p++ = U8(w);
  86         break;
  87       }
  88       n = *v++;
  89       *p++ = U8(w | n << bits);
  90       w = n >> (8 - bits);
  91       bits += MPW_BITS - 8;
  92     } else {
  93       *p++ = U8(w);
  94       w >>= 8;
  95       bits -= 8;
  96     }
  97   }
  98   memset(p, 0, q - p);
  99 }
 100
 101 /* --- @mpx_loadl@ --- *
 102  *
 103  * Arguments:   @mpw *v, *vl@ = base and limit of destination vector
 104  *              @const void *pp@ = pointer to octet array
 105  *              @size_t sz@ = size of octet array
 106  *
 107  * Returns:     ---
 108  *
 109  * Use:         Loads an MP in an octet array, least significant octet
 110  *              first.  High-end octets are ignored if there isn't enough
 111  *              space for them.
 112  */
 113
 114 void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
 115 {
 116   unsigned n;
 117   mpw w = 0;
 118   const octet *p = pp, *q = p + sz;
 119   unsigned bits = 0;
 120
 121   if (v >= vl)
 122     return;
 123   while (p < q) {
 124     n = U8(*p++);
 125     w |= n << bits;
 126     bits += 8;
 127     if (bits >= MPW_BITS) {
 128       *v++ = MPW(w);
 129       w = n >> (MPW_BITS - bits + 8);
 130       bits -= MPW_BITS;
 131       if (v >= vl)
 132         return;
 133     }
 134   }
 135   *v++ = w;
 136   MPX_ZERO(v, vl);
 137 }
 138
 139 /* --- @mpx_storeb@ --- *
 140  *
 141  * Arguments:   @const mpw *v, *vl@ = base and limit of source vector
 142  *              @void *pp@ = pointer to octet array
 143  *              @size_t sz@ = size of octet array
 144  *
 145  * Returns:     ---
 146  *
 147  * Use:         Stores an MP in an octet array, most significant octet
 148  *              first.  High-end octets are silently discarded if there
 149  *              isn't enough space for them.
 150  */
 151
 152 void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
 153 {
 154   mpw n, w = 0;
 155   octet *p = pp, *q = p + sz;
 156   unsigned bits = 0;
 157
 158   while (q > p) {
 159     if (bits < 8) {
 160       if (v >= vl) {
 161         *--q = U8(w);
 162         break;
 163       }
 164       n = *v++;
 165       *--q = U8(w | n << bits);
 166       w = n >> (8 - bits);
 167       bits += MPW_BITS - 8;
 168     } else {
 169       *--q = U8(w);
 170       w >>= 8;
 171       bits -= 8;
 172     }
 173   }
 174   memset(p, 0, q - p);
 175 }
 176
 177 /* --- @mpx_loadb@ --- *
 178  *
 179  * Arguments:   @mpw *v, *vl@ = base and limit of destination vector
 180  *              @const void *pp@ = pointer to octet array
 181  *              @size_t sz@ = size of octet array
 182  *
 183  * Returns:     ---
 184  *
 185  * Use:         Loads an MP in an octet array, most significant octet
 186  *              first.  High-end octets are ignored if there isn't enough
 187  *              space for them.
 188  */
 189
 190 void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
 191 {
 192   unsigned n;
 193   mpw w = 0;
 194   const octet *p = pp, *q = p + sz;
 195   unsigned bits = 0;
 196
 197   if (v >= vl)
 198     return;
 199   while (q > p) {
 200     n = U8(*--q);
 201     w |= n << bits;
 202     bits += 8;
 203     if (bits >= MPW_BITS) {
 204       *v++ = MPW(w);
 205       w = n >> (MPW_BITS - bits + 8);
 206       bits -= MPW_BITS;
 207       if (v >= vl)
 208         return;
 209     }
 210   }
 211   *v++ = w;
 212   MPX_ZERO(v, vl);
 213 }
 214
 215 /*----- Logical shifting --------------------------------------------------*/
 216
 217 /* --- @mpx_lsl@ --- *
 218  *
 219  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 220  *              @const mpw *av, *avl@ = source vector base and limit
 221  *              @size_t n@ = number of bit positions to shift by
 222  *
 223  * Returns:     ---
 224  *
 225  * Use:         Performs a logical shift left operation on an integer.
 226  */
 227
 228 void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
 229 {
 230   size_t nw;
 231   unsigned nb;
 232
 233   /* --- Trivial special case --- */
 234
 235   if (n == 0)
 236     MPX_COPY(dv, dvl, av, avl);
 237
 238   /* --- Single bit shifting --- */
 239
 240   else if (n == 1) {
 241     mpw w = 0;
 242     while (av < avl) {
 243       mpw t;
 244       if (dv >= dvl)
 245         goto done;
 246       t = *av++;
 247       *dv++ = MPW((t << 1) | w);
 248       w = t >> (MPW_BITS - 1);
 249     }
 250     if (dv >= dvl)
 251       goto done;
 252     *dv++ = MPW(w);
 253     MPX_ZERO(dv, dvl);
 254     goto done;
 255   }
 256
 257   /* --- Break out word and bit shifts for more sophisticated work --- */
 258
 259   nw = n / MPW_BITS;
 260   nb = n % MPW_BITS;
 261
 262   /* --- Handle a shift by a multiple of the word size --- */
 263
 264   if (nb == 0) {
 265     MPX_COPY(dv + nw, dvl, av, avl);
 266     memset(dv, 0, MPWS(nw));
 267   }
 268
 269   /* --- And finally the difficult case --- *
 270    *
 271    * This is a little convoluted, because I have to start from the end and
 272    * work backwards to avoid overwriting the source, if they're both the same
 273    * block of memory.
 274    */
 275
 276   else {
 277     mpw w;
 278     size_t nr = MPW_BITS - nb;
 279     size_t dvn = dvl - dv;
 280     size_t avn = avl - av;
 281
 282     if (dvn <= nw) {
 283       MPX_ZERO(dv, dvl);
 284       goto done;
 285     }
 286
 287     if (dvn > avn + nw) {
 288       size_t off = avn + nw + 1;
 289       MPX_ZERO(dv + off, dvl);
 290       dvl = dv + off;
 291       w = 0;
 292     } else {
 293       avl = av + dvn - nw;
 294       w = *--avl << nb;
 295     }
 296
 297     while (avl > av) {
 298       mpw t = *--avl;
 299       *--dvl = (t >> nr) | w;
 300       w = t << nb;
 301     }
 302
 303     *--dvl = w;
 304     MPX_ZERO(dv, dvl);
 305   }
 306
 307 done:;
 308 }
 309
 310 /* --- @mpx_lsr@ --- *
 311  *
 312  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 313  *              @const mpw *av, *avl@ = source vector base and limit
 314  *              @size_t n@ = number of bit positions to shift by
 315  *
 316  * Returns:     ---
 317  *
 318  * Use:         Performs a logical shift right operation on an integer.
 319  */
 320
 321 void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
 322 {
 323   size_t nw;
 324   unsigned nb;
 325
 326   /* --- Trivial special case --- */
 327
 328   if (n == 0)
 329     MPX_COPY(dv, dvl, av, avl);
 330
 331   /* --- Single bit shifting --- */
 332
 333   else if (n == 1) {
 334     mpw w = *av++ >> 1;
 335     while (av < avl) {
 336       mpw t;
 337       if (dv >= dvl)
 338         goto done;
 339       t = *av++;
 340       *dv++ = MPW((t << (MPW_BITS - 1)) | w);
 341       w = t >> 1;
 342     }
 343     if (dv >= dvl)
 344       goto done;
 345     *dv++ = MPW(w);
 346     MPX_ZERO(dv, dvl);
 347     goto done;
 348   }
 349
 350   /* --- Break out word and bit shifts for more sophisticated work --- */
 351
 352   nw = n / MPW_BITS;
 353   nb = n % MPW_BITS;
 354
 355   /* --- Handle a shift by a multiple of the word size --- */
 356
 357   if (nb == 0)
 358     MPX_COPY(dv, dvl, av + nw, avl);
 359
 360   /* --- And finally the difficult case --- */
 361
 362   else {
 363     mpw w;
 364     size_t nr = MPW_BITS - nb;
 365
 366     av += nw;
 367     w = *av++;
 368     while (av < avl) {
 369       mpw t;
 370       if (dv >= dvl)
 371         goto done;
 372       t = *av++;
 373       *dv++ = MPW((w >> nb) | (t << nr));
 374       w = t;
 375     }
 376     if (dv < dvl) {
 377       *dv++ = MPW(w >> nb);
 378       MPX_ZERO(dv, dvl);
 379     }
 380   }
 381
 382 done:;
 383 }
 384
 385 /*----- Unsigned arithmetic -----------------------------------------------*/
 386
 387 /* --- @mpx_2c@ --- *
 388  *
 389  * Arguments:   @mpw *dv, *dvl@ = destination vector
 390  *              @const mpw *v, *vl@ = source vector
 391  *
 392  * Returns:     ---
 393  *
 394  * Use:         Calculates the two's complement of @v@.
 395  */
 396
 397 void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
 398 {
 399   mpw c = 0;
 400   while (dv < dvl && v < vl)
 401     *dv++ = c = MPW(~*v++);
 402   if (dv < dvl) {
 403     if (c > MPW_MAX / 2)
 404       c = MPW(~0);
 405     while (dv < dvl)
 406       *dv++ = c;
 407   }
 408   MPX_UADDN(dv, dvl, 1);
 409 }
 410
 411 /* --- @mpx_ucmp@ --- *
 412  *
 413  * Arguments:   @const mpw *av, *avl@ = first argument vector base and limit
 414  *              @const mpw *bv, *bvl@ = second argument vector base and limit
 415  *
 416  * Returns:     Less than, equal to, or greater than zero depending on
 417  *              whether @a@ is less than, equal to or greater than @b@,
 418  *              respectively.
 419  *
 420  * Use:         Performs an unsigned integer comparison.
 421  */
 422
 423 int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
 424 {
 425   MPX_SHRINK(av, avl);
 426   MPX_SHRINK(bv, bvl);
 427
 428   if (avl - av > bvl - bv)
 429     return (+1);
 430   else if (avl - av < bvl - bv)
 431     return (-1);
 432   else while (avl > av) {
 433     mpw a = *--avl, b = *--bvl;
 434     if (a > b)
 435       return (+1);
 436     else if (a < b)
 437       return (-1);
 438   }
 439   return (0);
 440 }
 441
 442 /* --- @mpx_uadd@ --- *
 443  *
 444  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 445  *              @const mpw *av, *avl@ = first addend vector base and limit
 446  *              @const mpw *bv, *bvl@ = second addend vector base and limit
 447  *
 448  * Returns:     ---
 449  *
 450  * Use:         Performs unsigned integer addition.  If the result overflows
 451  *              the destination vector, high-order bits are discarded.  This
 452  *              means that two's complement addition happens more or less for
 453  *              free, although that's more a side-effect than anything else.
 454  *              The result vector may be equal to either or both source
 455  *              vectors, but may not otherwise overlap them.
 456  */
 457
 458 void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 459               const mpw *bv, const mpw *bvl)
 460 {
 461   mpw c = 0;
 462
 463   while (av < avl || bv < bvl) {
 464     mpw a, b;
 465     mpd x;
 466     if (dv >= dvl)
 467       return;
 468     a = (av < avl) ? *av++ : 0;
 469     b = (bv < bvl) ? *bv++ : 0;
 470     x = (mpd)a + (mpd)b + c;
 471     *dv++ = MPW(x);
 472     c = x >> MPW_BITS;
 473   }
 474   if (dv < dvl) {
 475     *dv++ = c;
 476     MPX_ZERO(dv, dvl);
 477   }
 478 }
 479
 480 /* --- @mpx_usub@ --- *
 481  *
 482  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 483  *              @const mpw *av, *avl@ = first argument vector base and limit
 484  *              @const mpw *bv, *bvl@ = second argument vector base and limit
 485  *
 486  * Returns:     ---
 487  *
 488  * Use:         Performs unsigned integer subtraction.  If the result
 489  *              overflows the destination vector, high-order bits are
 490  *              discarded.  This means that two's complement subtraction
 491  *              happens more or less for free, althuogh that's more a side-
 492  *              effect than anything else.  The result vector may be equal to
 493  *              either or both source vectors, but may not otherwise overlap
 494  *              them.
 495  */
 496
 497 void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 498               const mpw *bv, const mpw *bvl)
 499 {
 500   mpw c = 0;
 501
 502   while (av < avl || bv < bvl) {
 503     mpw a, b;
 504     mpd x;
 505     if (dv >= dvl)
 506       return;
 507     a = (av < avl) ? *av++ : 0;
 508     b = (bv < bvl) ? *bv++ : 0;
 509     x = (mpd)a - (mpd)b - c;
 510     *dv++ = MPW(x);
 511     if (x >> MPW_BITS)
 512       c = 1;
 513     else
 514       c = 0;
 515   }
 516   if (c)
 517     c = MPW_MAX;
 518   while (dv < dvl)
 519     *dv++ = c;
 520 }
 521
 522 /* --- @mpx_umul@ --- *
 523  *
 524  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 525  *              @const mpw *av, *avl@ = multiplicand vector base and limit
 526  *              @const mpw *bv, *bvl@ = multiplier vector base and limit
 527  *
 528  * Returns:     ---
 529  *
 530  * Use:         Performs unsigned integer multiplication.  If the result
 531  *              overflows the desination vector, high-order bits are
 532  *              discarded.  The result vector may not overlap the argument
 533  *              vectors in any way.
 534  */
 535
 536 void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
 537               const mpw *bv, const mpw *bvl)
 538 {
 539   /* --- This is probably worthwhile on a multiply --- */
 540
 541   MPX_SHRINK(av, avl);
 542   MPX_SHRINK(bv, bvl);
 543
 544   /* --- Deal with a multiply by zero --- */
 545
 546   if (bv == bvl) {
 547     MPX_ZERO(dv, dvl);
 548     return;
 549   }
 550
 551   /* --- Do the initial multiply and initialize the accumulator --- */
 552
 553   MPX_UMULN(dv, dvl, av, avl, *bv++);
 554
 555   /* --- Do the remaining multiply/accumulates --- */
 556
 557   while (dv < dvl && bv < bvl) {
 558     mpw m = *bv++;
 559     mpw c = 0;
 560     const mpw *avv = av;
 561     mpw *dvv = ++dv;
 562
 563     while (avv < avl) {
 564       mpd x;
 565       if (dvv >= dvl)
 566         goto next;
 567       x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
 568       *dvv++ = MPW(x);
 569       c = x >> MPW_BITS;
 570     }
 571     MPX_UADDN(dvv, dvl, c);
 572   next:;
 573   }
 574 }
 575
 576 /* --- @mpx_usqr@ --- *
 577  *
 578  * Arguments:   @mpw *dv, *dvl@ = destination vector base and limit
 579  *              @const mpw *av, *av@ = source vector base and limit
 580  *
 581  * Returns:     ---
 582  *
 583  * Use:         Performs unsigned integer squaring.  The result vector must
 584  *              not overlap the source vector in any way.
 585  */
 586
 587 void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
 588 {
 589   MPX_ZERO(dv, dvl);
 590
 591   /* --- Main loop --- */
 592
 593   while (av < avl) {
 594     const mpw *avv = av;
 595     mpw *dvv = dv;
 596     mpw a = *av;
 597     mpd c;
 598
 599     /* --- Stop if I've run out of destination --- */
 600
 601     if (dvv >= dvl)
 602       break;
 603
 604     /* --- Work out the square at this point in the proceedings --- */
 605
 606     {
 607       mpd x = (mpd)a * (mpd)a + *dvv;
 608       *dvv++ = MPW(x);
 609       c = MPW(x >> MPW_BITS);
 610     }
 611
 612     /* --- Now fix up the rest of the vector upwards --- */
 613
 614     avv++;
 615     while (dvv < dvl && avv < avl) {
 616       mpd x = (mpd)a * (mpd)*avv++;
 617       mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
 618       c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
 619       *dvv++ = MPW(y);
 620     }
 621     while (dvv < dvl && c) {
 622       mpd x = c + *dvv;
 623       *dvv++ = MPW(x);
 624       c = x >> MPW_BITS;
 625     }
 626
 627     /* --- Get ready for the next round --- */
 628
 629     av++;
 630     dv += 2;
 631   }
 632 }
 633
 634 /* --- @mpx_udiv@ --- *
 635  *
 636  * Arguments:   @mpw *qv, *qvl@ = quotient vector base and limit
 637  *              @mpw *rv, *rvl@ = dividend/remainder vector base and limit
 638  *              @const mpw *dv, *dvl@ = divisor vector base and limit
 639  *              @mpw *sv, *svl@ = scratch workspace
 640  *
 641  * Returns:     ---
 642  *
 643  * Use:         Performs unsigned integer division.  If the result overflows
 644  *              the quotient vector, high-order bits are discarded.  (Clearly
 645  *              the remainder vector can't overflow.)  The various vectors
 646  *              may not overlap in any way.  Yes, I know it's a bit odd
 647  *              requiring the dividend to be in the result position but it
 648  *              does make some sense really.  The remainder must have
 649  *              headroom for at least two extra words.  The scratch space
 650  *              must be at least one word larger than the divisor.
 651  */
 652
 653 void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
 654               const mpw *dv, const mpw *dvl,
 655               mpw *sv, mpw *svl)
 656 {
 657   unsigned norm = 0;
 658   size_t scale;
 659   mpw d, dd;
 660
 661   /* --- Initialize the quotient --- */
 662
 663   MPX_ZERO(qv, qvl);
 664
 665   /* --- Perform some sanity checks --- */
 666
 667   MPX_SHRINK(dv, dvl);
 668   assert(((void)"division by zero in mpx_udiv", dv < dvl));
 669
 670   /* --- Normalize the divisor --- *
 671    *
 672    * The algorithm requires that the divisor be at least two digits long.
 673    * This is easy to fix.
 674    */
 675
 676   {
 677     unsigned b;
 678
 679     d = dvl[-1];
 680     for (b = MPW_BITS / 2; b; b >>= 1) {
 681       if (d < (MPW_MAX >> b)) {
 682         d <<= b;
 683         norm += b;
 684       }
 685     }
 686     if (dv + 1 == dvl)
 687       norm += MPW_BITS;
 688   }
 689
 690   /* --- Normalize the dividend/remainder to match --- */
 691
 692   if (norm) {
 693     mpx_lsl(rv, rvl, rv, rvl, norm);
 694     mpx_lsl(sv, svl, dv, dvl, norm);
 695     dv = sv;
 696     dvl = svl;
 697     MPX_SHRINK(dv, dvl);
 698   }
 699
 700   MPX_SHRINK(rv, rvl);
 701   d = dvl[-1];
 702   dd = dvl[-2];
 703
 704   /* --- Work out the relative scales --- */
 705
 706   {
 707     size_t rvn = rvl - rv;
 708     size_t dvn = dvl - dv;
 709
 710     /* --- If the divisor is clearly larger, notice this --- */
 711
 712     if (dvn > rvn) {
 713       mpx_lsr(rv, rvl, rv, rvl, norm);
 714       return;
 715     }
 716
 717     scale = rvn - dvn;
 718   }
 719
 720   /* --- Calculate the most significant quotient digit --- *
 721    *
 722    * Because the divisor has its top bit set, this can only happen once.  The
 723    * pointer arithmetic is a little contorted, to make sure that the
 724    * behaviour is defined.
 725    */
 726
 727   if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
 728     mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
 729     if (qvl - qv > scale)
 730       qv[scale] = 1;
 731   }
 732
 733   /* --- Now for the main loop --- */
 734
 735   {
 736     mpw *rvv = rvl - 2;
 737
 738     while (scale) {
 739       mpw q;
 740       mpd rh;
 741
 742       /* --- Get an estimate for the next quotient digit --- */
 743
 744       mpw r = rvv[1];
 745       mpw rr = rvv[0];
 746       mpw rrr = *--rvv;
 747
 748       scale--;
 749       rh = ((mpd)r << MPW_BITS) | rr;
 750       if (r == d)
 751         q = MPW_MAX;
 752       else
 753         q = MPW(rh / d);
 754
 755       /* --- Refine the estimate --- */
 756
 757       {
 758         mpd yh = (mpd)d * q;
 759         mpd yl = (mpd)dd * q;
 760
 761         if (yl > MPW_MAX) {
 762           yh += yl >> MPW_BITS;
 763           yl &= MPW_MAX;
 764         }
 765
 766         while (yh > rh || (yh == rh && yl > rrr)) {
 767           q--;
 768           yh -= d;
 769           if (yl < dd) {
 770             yh++;
 771             yl += MPW_MAX;
 772           }
 773           yl -= dd;
 774         }
 775       }
 776
 777       /* --- Remove a chunk from the dividend --- */
 778
 779       {
 780         mpw *svv;
 781         const mpw *dvv;
 782         mpw mc = 0, sc = 0;
 783
 784         /* --- Calculate the size of the chunk --- *
 785          *
 786          * This does the whole job of calculating @r >> scale - qd@.
 787          */
 788
 789         for (svv = rv + scale, dvv = dv;
 790              dvv < dvl && svv < rvl;
 791              svv++, dvv++) {
 792           mpd x = (mpd)*dvv * (mpd)q + mc;
 793           mc = x >> MPW_BITS;
 794           x = (mpd)*svv - MPW(x) - sc;
 795           *svv = MPW(x);
 796           if (x >> MPW_BITS)
 797             sc = 1;
 798           else
 799             sc = 0;
 800         }
 801
 802         if (svv < rvl) {
 803           mpd x = (mpd)*svv - mc - sc;
 804           *svv++ = MPW(x);
 805           if (x >> MPW_BITS)
 806             sc = MPW_MAX;
 807           else
 808             sc = 0;
 809           while (svv < rvl)
 810             *svv++ = sc;
 811         }
 812
 813         /* --- Fix if the quotient was too large --- *
 814          *
 815          * This doesn't seem to happen very often.
 816          */
 817
 818         if (rvl[-1] > MPW_MAX / 2) {
 819           mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
 820           q--;
 821         }
 822       }
 823
 824       /* --- Done for another iteration --- */
 825
 826       if (qvl - qv > scale)
 827         qv[scale] = q;
 828       r = rr;
 829       rr = rrr;
 830     }
 831   }
 832
 833   /* --- Now fiddle with unnormalizing and things --- */
 834
 835   mpx_lsr(rv, rvl, rv, rvl, norm);
 836 }
 837
 838 /*----- That's all, folks -------------------------------------------------*/