chiark - git - mdw - catacomb/blob - mptext.c

   1 /* -*-c-*-
   2  *
   3  * $Id: mptext.c,v 1.5 2000/06/17 11:46:19 mdw Exp $
   4  *
   5  * Textual representation of multiprecision numbers
   6  *
   7  * (c) 1999 Straylight/Edgeware
   8  */
   9
  10 /*----- Licensing notice --------------------------------------------------*
  11  *
  12  * This file is part of Catacomb.
  13  *
  14  * Catacomb is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU Library General Public License as
  16  * published by the Free Software Foundation; either version 2 of the
  17  * License, or (at your option) any later version.
  18  *
  19  * Catacomb is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU Library General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU Library General Public
  25  * License along with Catacomb; if not, write to the Free
  26  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  27  * MA 02111-1307, USA.
  28  */
  29
  30 /*----- Revision history --------------------------------------------------*
  31  *
  32  * $Log: mptext.c,v $
  33  * Revision 1.5  2000/06/17 11:46:19  mdw
  34  * New and much faster stack-based algorithm for reading integers.  Support
  35  * reading and writing binary integers in bases between 2 and 256.
  36  *
  37  * Revision 1.4  1999/12/22 15:56:56  mdw
  38  * Use clever recursive algorithm for writing numbers out.
  39  *
  40  * Revision 1.3  1999/12/10 23:23:26  mdw
  41  * Allocate slightly less memory.
  42  *
  43  * Revision 1.2  1999/11/20 22:24:15  mdw
  44  * Use function versions of MPX_UMULN and MPX_UADDN.
  45  *
  46  * Revision 1.1  1999/11/17 18:02:16  mdw
  47  * New multiprecision integer arithmetic suite.
  48  *
  49  */
  50
  51 /*----- Header files ------------------------------------------------------*/
  52
  53 #include <ctype.h>
  54 #include <limits.h>
  55 #include <stdio.h>
  56
  57 #include "mp.h"
  58 #include "mptext.h"
  59 #include "paranoia.h"
  60
  61 /*----- Magical numbers ---------------------------------------------------*/
  62
  63 /* --- Maximum recursion depth --- *
  64  *
  65  * This is the number of bits in a @size_t@ object.  Why?
  66  *
  67  * Just to convince yourself that this is correct: let @b = MPW_MAX + 1@.
  68  * Then the largest possible @mp@ is %$M - 1$% where %$M = b^Z$%.  Let %$r$%
  69  * be a radix to read or write.  Since the recursion squares the radix at
  70  * each step, the highest number reached by the recursion is %$d$%, where:
  71  *
  72  *   %$r^(2^d) = b^Z$%.
  73  *
  74  * Solving gives that %$d = \lg \log_r b^Z$%.  If %$r = 2$%, this is maximum,
  75  * so choosing %$d = \lg \lg b^Z = \lg (Z \lg b) = \lg Z + \lg \lg b$%.
  76  *
  77  * Expressing %$\lg Z$% as @CHAR_BIT * sizeof(size_t)@ yields an
  78  * overestimate, since a @size_t@ representation may contain `holes'.
  79  * Choosing to represent %$\lg \lg b$% by 10 is almost certainly sufficient
  80  * for `some time to come'.
  81  */
  82
  83 #define DEPTH (CHAR_BIT * sizeof(size_t) + 10)
  84
  85 /*----- Main code ---------------------------------------------------------*/
  86
  87 /* --- @mp_read@ --- *
  88  *
  89  * Arguments:   @mp *m@ = destination multiprecision number
  90  *              @int radix@ = base to assume for data (or zero to guess)
  91  *              @const mptext_ops *ops@ = pointer to operations block
  92  *              @void *p@ = data for the operations block
  93  *
  94  * Returns:     The integer read, or zero if it didn't work.
  95  *
  96  * Use:         Reads an integer from some source.  If the @radix@ is
  97  *              specified, the number is assumed to be given in that radix,
  98  *              with the letters `a' (either upper- or lower-case) upwards
  99  *              standing for digits greater than 9.  Otherwise, base 10 is
 100  *              assumed unless the number starts with `0' (octal), `0x' (hex)
 101  *              or `nnn_' (base `nnn').  An arbitrary amount of whitespace
 102  *              before the number is ignored.
 103  */
 104
 105 /* --- About the algorithm --- *
 106  *
 107  * The algorithm here is rather aggressive.  I maintain an array of
 108  * successive squarings of the radix, and a stack of partial results, each
 109  * with a counter attached indicating which radix square to multiply by.
 110  * Once the item at the top of the stack reaches the same counter level as
 111  * the next item down, they are combined together and the result is given a
 112  * counter level one higher than either of the results.
 113  *
 114  * Gluing the results together at the end is slightly tricky.  Pay attention
 115  * to the code.
 116  *
 117  * This is more complicated because of the need to handle the slightly
 118  * bizarre syntax.
 119  */
 120
 121 mp *mp_read(mp *m, int radix, const mptext_ops *ops, void *p)
 122 {
 123   int ch;                               /* Current char being considered */
 124   unsigned f = 0;                       /* Flags about the current number */
 125   int r;                                /* Radix to switch over to */
 126   mpw rd;                               /* Radix as an @mp@ digit */
 127   mp rr;                                /* The @mp@ for the radix */
 128   unsigned nf = m ? m->f & MP_BURN : 0; /* New @mp@ flags */
 129
 130   /* --- Stacks --- */
 131
 132   mp *pow[DEPTH];                       /* List of powers */
 133   unsigned pows;                        /* Next index to fill */
 134   struct { unsigned i; mp *m; } s[DEPTH]; /* Main stack */
 135   unsigned sp;                          /* Current stack pointer */
 136
 137   /* --- Flags --- */
 138
 139   enum {
 140     f_neg = 1u,
 141     f_ok = 2u
 142   };
 143
 144   /* --- Initialize the stacks --- */
 145
 146   mp_build(&rr, &rd, &rd + 1);
 147   pow[0] = &rr;
 148   pows = 1;
 149
 150   sp = 0;
 151
 152   /* --- Initialize the destination number --- */
 153
 154   if (m)
 155     MP_DROP(m);
 156
 157   /* --- Read an initial character --- */
 158
 159   ch = ops->get(p);
 160   while (isspace(ch))
 161     ch = ops->get(p);
 162
 163   /* --- Handle an initial sign --- */
 164
 165   if (ch == '-') {
 166     f |= f_neg;
 167     ch = ops->get(p);
 168     while (isspace(ch))
 169       ch = ops->get(p);
 170   }
 171
 172   /* --- If the radix is zero, look for leading zeros --- */
 173
 174   if (radix > 0) {
 175     assert(((void)"ascii radix must be <= 36", radix <= 36));
 176     rd = radix;
 177     r = -1;
 178   } else if (radix < 0) {
 179     rd = -radix;
 180     assert(((void)"binary radix must fit in a byte ", rd < UCHAR_MAX));
 181     r = -1;
 182   } else if (ch != '0') {
 183     rd = 10;
 184     r = 0;
 185   } else {
 186     ch = ops->get(p);
 187     if (ch == 'x') {
 188       ch = ops->get(p);
 189       rd = 16;
 190     } else {
 191       rd = 8;
 192       f |= f_ok;
 193     }
 194     r = -1;
 195   }
 196
 197   /* --- Time to start --- */
 198
 199   for (;; ch = ops->get(p)) {
 200     int x;
 201
 202     /* --- An underscore indicates a numbered base --- */
 203
 204     if (ch == '_' && r > 0 && r <= 36) {
 205       unsigned i;
 206
 207       /* --- Clear out the stacks --- */
 208
 209       for (i = 1; i < pows; i++)
 210         MP_DROP(pow[i]);
 211       pows = 1;
 212       for (i = 0; i < sp; i++)
 213         MP_DROP(s[i].m);
 214       sp = 0;
 215
 216       /* --- Restart the search --- */
 217
 218       rd = r;
 219       r = -1;
 220       f &= ~f_ok;
 221       continue;
 222     }
 223
 224     /* --- Check that the character is a digit and in range --- */
 225
 226     if (radix < 0)
 227       x = ch;
 228     else {
 229       if (!isalnum(ch))
 230         break;
 231       if (ch >= '0' && ch <= '9')
 232         x = ch - '0';
 233       else {
 234         ch = tolower(ch);
 235         if (ch >= 'a' && ch <= 'z')     /* ASCII dependent! */
 236           x = ch - 'a' + 10;
 237         else
 238           break;
 239       }
 240     }
 241
 242     /* --- Sort out what to do with the character --- */
 243
 244     if (x >= 10 && r >= 0)
 245       r = -1;
 246     if (x >= rd)
 247       break;
 248
 249     if (r >= 0)
 250       r = r * 10 + x;
 251
 252     /* --- Stick the character on the end of my integer --- */
 253
 254     assert(((void)"Number is too unimaginably huge", sp < DEPTH));
 255     s[sp].m = m = mp_new(1, nf);
 256     m->v[0] = x;
 257     s[sp].i = 0;
 258
 259     /* --- Now grind through the stack --- */
 260
 261     while (sp > 0 && s[sp - 1].i == s[sp].i) {
 262
 263       /* --- Combine the top two items --- */
 264
 265       sp--;
 266       m = s[sp].m;
 267       m = mp_mul(m, m, pow[s[sp].i]);
 268       m = mp_add(m, m, s[sp + 1].m);
 269       s[sp].m = m;
 270       MP_DROP(s[sp + 1].m);
 271       s[sp].i++;
 272
 273       /* --- Make a new radix power if necessary --- */
 274
 275       if (s[sp].i >= pows) {
 276         assert(((void)"Number is too unimaginably huge", pows < DEPTH));
 277         pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
 278         pows++;
 279       }
 280     }
 281     f |= f_ok;
 282     sp++;
 283   }
 284
 285   ops->unget(ch, p);
 286
 287   /* --- If we're done, compute the rest of the number --- */
 288
 289   if (f & f_ok) {
 290     if (!sp)
 291       return (MP_ZERO);
 292     else {
 293       mp *z = MP_ONE;
 294       sp--;
 295
 296       while (sp > 0) {
 297
 298         /* --- Combine the top two items --- */
 299
 300         sp--;
 301         m = s[sp].m;
 302         z = mp_mul(z, z, pow[s[sp + 1].i]);
 303         m = mp_mul(m, m, z);
 304         m = mp_add(m, m, s[sp + 1].m);
 305         s[sp].m = m;
 306         MP_DROP(s[sp + 1].m);
 307
 308         /* --- Make a new radix power if necessary --- */
 309
 310         if (s[sp].i >= pows) {
 311           assert(((void)"Number is too unimaginably huge", pows < DEPTH));
 312           pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
 313           pows++;
 314         }
 315       }
 316       MP_DROP(z);
 317       m = s[0].m;
 318     }
 319   } else {
 320     unsigned i;
 321     for (i = 0; i < sp; i++)
 322       MP_DROP(s[i].m);
 323   }
 324
 325   /* --- Clear the radix power list --- */
 326
 327   {
 328     unsigned i;
 329     for (i = 1; i < pows; i++)
 330       MP_DROP(pow[i]);
 331   }
 332
 333   /* --- Bail out if the number was bad --- */
 334
 335   if (!(f & f_ok))
 336     return (0);
 337
 338   /* --- Set the sign and return --- */
 339
 340   if (f & f_neg)
 341     m->f |= MP_NEG;
 342   return (m);
 343 }
 344
 345 /* --- @mp_write@ --- *
 346  *
 347  * Arguments:   @mp *m@ = pointer to a multi-precision integer
 348  *              @int radix@ = radix to use when writing the number out
 349  *              @const mptext_ops *ops@ = pointer to an operations block
 350  *              @void *p@ = data for the operations block
 351  *
 352  * Returns:     Zero if it worked, nonzero otherwise.
 353  *
 354  * Use:         Writes a large integer in textual form.
 355  */
 356
 357 /* --- Simple case --- *
 358  *
 359  * Use a fixed-sized buffer and the simple single-precision division
 360  * algorithm to pick off low-order digits.  Put each digit in a buffer,
 361  * working backwards from the end.  If the buffer becomes full, recurse to
 362  * get another one.  Ensure that there are at least @z@ digits by writing
 363  * leading zeroes if there aren't enough real digits.
 364  */
 365
 366 static int simple(mp *m, int radix, unsigned z,
 367                   const mptext_ops *ops, void *p)
 368 {
 369   int rc = 0;
 370   char buf[64];
 371   unsigned i = sizeof(buf);
 372   int rd = radix > 0 ? radix : -radix;
 373
 374   do {
 375     int ch;
 376     mpw x;
 377
 378     x = mpx_udivn(m->v, m->vl, m->v, m->vl, rd);
 379     MP_SHRINK(m);
 380     if (radix < 0)
 381       ch = x;
 382     else {
 383       if (x < 10)
 384         ch = '0' + x;
 385       else
 386         ch = 'a' + x - 10;
 387     }
 388     buf[--i] = ch;
 389     if (z)
 390       z--;
 391   } while (i && MP_LEN(m));
 392
 393   if (MP_LEN(m))
 394     rc = simple(m, radix, z, ops, p);
 395   else {
 396     static const char zero[32] = "00000000000000000000000000000000";
 397     while (!rc && z >= sizeof(zero)) {
 398       rc = ops->put(zero, sizeof(zero), p);
 399       z -= sizeof(zero);
 400     }
 401     if (!rc && z)
 402       rc = ops->put(zero, z, p);
 403   }
 404   if (!rc)
 405     ops->put(buf + i, sizeof(buf) - i, p);
 406   if (m->f & MP_BURN)
 407     BURN(buf);
 408   return (rc);
 409 }
 410
 411 /* --- Complicated case --- *
 412  *
 413  * If the number is small, fall back to the simple case above.  Otherwise
 414  * divide and take remainder by current large power of the radix, and emit
 415  * each separately.  Don't emit a zero quotient.  Be very careful about
 416  * leading zeroes on the remainder part, because they're deeply significant.
 417  */
 418
 419 static int complicated(mp *m, int radix, mp **pr, unsigned i, unsigned z,
 420                        const mptext_ops *ops, void *p)
 421 {
 422   int rc = 0;
 423   mp *q = MP_NEW;
 424   unsigned d = 1 << i;
 425
 426   if (MP_LEN(m) < 8)
 427     return (simple(m, radix, z, ops, p));
 428
 429   mp_div(&q, &m, m, pr[i]);
 430   if (!MP_LEN(q))
 431     d = z;
 432   else {
 433     if (z > d)
 434       z -= d;
 435     else
 436       z = 0;
 437     rc = complicated(q, radix, pr, i - 1, z, ops, p);
 438   }
 439   if (!rc)
 440     rc = complicated(m, radix, pr, i - 1, d, ops, p);
 441   mp_drop(q);
 442   return (rc);
 443 }
 444
 445 /* --- Main driver code --- */
 446
 447 int mp_write(mp *m, int radix, const mptext_ops *ops, void *p)
 448 {
 449   int rc;
 450
 451   /* --- Set various things up --- */
 452
 453   m = MP_COPY(m);
 454   MP_SPLIT(m);
 455
 456   /* --- Check the radix for sensibleness --- */
 457
 458   if (radix > 0)
 459     assert(((void)"ascii radix must be <= 36", radix <= 36));
 460   else if (radix < 0)
 461     assert(((void)"binary radix must fit in a byte", -radix < UCHAR_MAX));
 462   else
 463     assert(((void)"radix can't be zero in mp_write", 0));
 464
 465   /* --- If the number is negative, sort that out --- */
 466
 467   if (m->f & MP_NEG) {
 468     if (ops->put("-", 1, p))
 469       return (EOF);
 470     m->f &= ~MP_NEG;
 471   }
 472
 473   /* --- If the number is small, do it the easy way --- */
 474
 475   if (MP_LEN(m) < 8)
 476     rc = simple(m, radix, 0, ops, p);
 477
 478   /* --- Use a clever algorithm --- *
 479    *
 480    * Square the radix repeatedly, remembering old results, until I get
 481    * something more than half the size of the number @m@.  Use this to divide
 482    * the number: the quotient and remainder will be approximately the same
 483    * size, and I'll have split them on a digit boundary, so I can just emit
 484    * the quotient and remainder recursively, in order.
 485    */
 486
 487   else {
 488     mp *pr[DEPTH];
 489     size_t target = MP_LEN(m) / 2;
 490     unsigned i = 0;
 491     mp *z = mp_new(1, 0);
 492
 493     /* --- Set up the exponent table --- */
 494
 495     z->v[0] = (radix > 0 ? radix : -radix);
 496     z->f = 0;
 497     for (;;) {
 498       assert(((void)"Number is too unimaginably huge", i < DEPTH));
 499       pr[i++] = z;
 500       if (MP_LEN(z) > target)
 501         break;
 502       z = mp_sqr(MP_NEW, z);
 503     }
 504
 505     /* --- Write out the answer --- */
 506
 507     rc = complicated(m, radix, pr, i - 1, 0, ops, p);
 508
 509     /* --- Tidy away the array --- */
 510
 511     while (i > 0)
 512       mp_drop(pr[--i]);
 513   }
 514
 515   /* --- Tidying up code --- */
 516
 517   MP_DROP(m);
 518   return (rc);
 519 }
 520
 521 /*----- Test rig ----------------------------------------------------------*/
 522
 523 #ifdef TEST_RIG
 524
 525 #include <mLib/testrig.h>
 526
 527 static int verify(dstr *v)
 528 {
 529   int ok = 1;
 530   int ib = *(int *)v[0].buf, ob = *(int *)v[2].buf;
 531   dstr d = DSTR_INIT;
 532   mp *m = mp_readdstr(MP_NEW, &v[1], 0, ib);
 533   if (m) {
 534     if (!ob) {
 535       fprintf(stderr, "*** unexpected successful parse\n"
 536                       "*** input [%i] = ", ib);
 537       if (ib < 0)
 538         type_hex.dump(&v[1], stderr);
 539       else
 540         fputs(v[1].buf, stderr);
 541       mp_writedstr(m, &d, 10);
 542       fprintf(stderr, "\n*** (value = %s)\n", d.buf);
 543       ok = 0;
 544     } else {
 545       mp_writedstr(m, &d, ob);
 546       if (d.len != v[3].len || memcmp(d.buf, v[3].buf, d.len) != 0) {
 547         fprintf(stderr, "*** failed read or write\n"
 548                         "*** input [%i]    = ", ib);
 549         if (ib < 0)
 550           type_hex.dump(&v[1], stderr);
 551         else
 552           fputs(v[1].buf, stderr);
 553         fprintf(stderr, "\n*** output [%i]   = ", ob);
 554         if (ob < 0)
 555           type_hex.dump(&d, stderr);
 556         else
 557           fputs(d.buf, stderr);
 558         fprintf(stderr, "\n*** expected [%i]   = ", ob);
 559         if (ob < 0)
 560           type_hex.dump(&v[3], stderr);
 561         else
 562           fputs(v[3].buf, stderr);
 563         fputc('\n', stderr);
 564         ok = 0;
 565       }
 566     }
 567     mp_drop(m);
 568   } else {
 569     if (ob) {
 570       fprintf(stderr, "*** unexpected parse failure\n"
 571                       "*** input [%i]    = ", ib);
 572       if (ib < 0)
 573         type_hex.dump(&v[1], stderr);
 574       else
 575         fputs(v[1].buf, stderr);
 576       fprintf(stderr, "\n*** expected [%i]   = ", ob);
 577       if (ob < 0)
 578         type_hex.dump(&v[3], stderr);
 579       else
 580         fputs(v[3].buf, stderr);
 581       fputc('\n', stderr);
 582       ok = 0;
 583     }
 584   }
 585
 586   dstr_destroy(&d);
 587   assert(mparena_count(MPARENA_GLOBAL) == 0);
 588   return (ok);
 589 }
 590
 591 static test_chunk tests[] = {
 592   { "mptext-ascii", verify,
 593     { &type_int, &type_string, &type_int, &type_string, 0 } },
 594   { "mptext-bin-in", verify,
 595     { &type_int, &type_hex, &type_int, &type_string, 0 } },
 596   { "mptext-bin-out", verify,
 597     { &type_int, &type_string, &type_int, &type_hex, 0 } },
 598   { 0, 0, { 0 } }
 599 };
 600
 601 int main(int argc, char *argv[])
 602 {
 603   sub_init();
 604   test_run(argc, argv, tests, SRCDIR "/tests/mptext");
 605   return (0);
 606 }
 607
 608 #endif
 609
 610 /*----- That's all, folks -------------------------------------------------*/