chiark - git - mdw - mLib/blob - test/tvec-types.c

   1 /* -*-c-*-
   2  *
   3  * Types for the test-vector framework
   4  *
   5  * (c) 2023 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of the mLib utilities library.
  11  *
  12  * mLib is free software: you can redistribute it and/or modify it under
  13  * the terms of the GNU Library General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or (at
  15  * your option) any later version.
  16  *
  17  * mLib is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20  * License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with mLib.  If not, write to the Free Software
  24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25  * USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <assert.h>
  31 #include <ctype.h>
  32 #include <errno.h>
  33 #include <float.h>
  34 #include <limits.h>
  35 #include <math.h>
  36 #include <stdio.h>
  37 #include <string.h>
  38
  39 #include "buf.h"
  40 #include "codec.h"
  41 #  include "base32.h"
  42 #  include "base64.h"
  43 #  include "hex.h"
  44 #include "dstr.h"
  45 #include "maths.h"
  46
  47 #include "tvec.h"
  48 #include "tvec-adhoc.h"
  49 #include "tvec-types.h"
  50
  51 /*----- Preliminary utilities ---------------------------------------------*/
  52
  53 /* --- @trivial_release@ --- *
  54  *
  55  * Arguments:   @union tvec_regval *rv@ = a register value
  56  *              @const struct tvec_regdef@ = the register definition
  57  *
  58  * Returns:     ---
  59  *
  60  * Use:         Does nothing.  Used for register values which don't retain
  61  *              resources.
  62  */
  63
  64 static void trivial_release(union tvec_regval *rv,
  65                             const struct tvec_regdef *rd)
  66   { ; }
  67
  68 /*----- Integer utilities -------------------------------------------------*/
  69
  70 /* --- @unsigned_to_buf@, @signed_to_buf@ --- *
  71  *
  72  * Arguments:   @buf *b@ = buffer to write on
  73  *              @unsigned long u@ or @long i@ = integer to write
  74  *
  75  * Returns:     Zero on success, @-1@ on failure.
  76  *
  77  * Use:         Write @i@ to the buffer, in big-endian (two's-complement, it
  78  *              signed) format.
  79  */
  80
  81 static int unsigned_to_buf(buf *b, unsigned long u)
  82   { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }
  83
  84 static int signed_to_buf(buf *b, long i)
  85 {
  86   kludge64 k;
  87   unsigned long u;
  88
  89   u = i;
  90   if (i >= 0) ASSIGN64(k, u);
  91   else { ASSIGN64(k, ~u); CPL64(k, k); }
  92   return (buf_putk64l(b, k));
  93 }
  94
  95 /* --- @unsigned_from_buf@, @signed_from_buf@ --- *
  96  *
  97  * Arguments:   @buf *b@ = buffer to write on
  98  *              @unsigned long *u_out@ or @long *i_out@ = where to put the
  99  *                      result
 100  *
 101  * Returns:     Zero on success, @-1@ on failure.
 102  *
 103  * Use:         Read an integer, in big-endian (two's-complement, if signed)
 104  *              format, from the buffer.
 105  */
 106
 107 static int unsigned_from_buf(buf *b, unsigned long *u_out)
 108 {
 109   kludge64 k, ulmax;
 110
 111   ASSIGN64(ulmax, ULONG_MAX);
 112   if (buf_getk64l(b, &k)) return (-1);
 113   if (CMP64(k, >, ulmax)) { buf_break(b); return (-1); }
 114   *u_out = GET64(unsigned long, k); return (0);
 115 }
 116
 117 /* --- @hex_width@ --- *
 118  *
 119  * Arguments:   @unsigned long u@ = an integer
 120  *
 121  * Returns:     A suitable number of digits to use in order to display @u@ in
 122  *              hex.  Currently, we select a power of two sufficient to show
 123  *              the value, but at least 2.
 124  */
 125
 126 static int hex_width(unsigned long u)
 127 {
 128   int wd;
 129   unsigned long t;
 130
 131   for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
 132   return (wd/4);
 133 }
 134
 135 /* --- @format_unsigned_hex@, @format_signed_hex@ --- *
 136  *
 137  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 138  *              @void *go@ = print destination
 139  *              @unsigned long u@ or @long i@ = integer to print
 140  *
 141  * Returns:     ---
 142  *
 143  * Use:         Print an unsigned or signed integer in hexadecimal.
 144  */
 145
 146 static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
 147                                 unsigned long u)
 148   { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }
 149
 150 static void format_signed_hex(const struct gprintf_ops *gops, void *go,
 151                               long i)
 152 {
 153   unsigned long u = i >= 0 ? i : -(unsigned long)i;
 154   gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
 155 }
 156
 157 static int signed_from_buf(buf *b, long *i_out)
 158 {
 159   kludge64 k, lmax, not_lmin;
 160
 161   ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
 162   if (buf_getk64l(b, &k)) return (-1);
 163   if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
 164   else {
 165     CPL64(k, k);
 166     if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
 167     else { buf_break(b); return (-1); }
 168   }
 169   return (0);
 170 }
 171
 172 /* --- @check_signed_range@, @check_unsigned_range@ --- *
 173  *
 174  * Arguments:   @long i@ or @unsigned long u@ = an integer
 175  *              @const struct tvec_irange *ir@ or
 176  *                      @const struct tvec_urange *ur@ = range specification,
 177  *                      or null
 178  *              @struct tvec_state *tv@ = test vector state
 179  *              @const char *what@ = description of value
 180  *
 181  * Returns:     Zero on success, or @-1@ on error.
 182  *
 183  * Use:         Check that the integer is within bounds.  If not, report a
 184  *              suitable error and return a failure indication.
 185  */
 186
 187 static int check_signed_range(long i,
 188                               const struct tvec_irange *ir,
 189                               struct tvec_state *tv, const char *what)
 190 {
 191   long ii, aa, m;
 192
 193   if (ir) {
 194     if (ir->min > i || i > ir->max) {
 195       tvec_error(tv, "%s %ld out of range (must be in [%ld .. %ld])",
 196                  what, i, ir->min, ir->max);
 197       return (-1);
 198     }
 199     m = ir->m; if (m > 0) m = -m;
 200     if (m && m != -1) {
 201       /* Reduce both the integer and the intended residue to the canonical
 202        * interval [0, m).  This is more awkward than it should be because C
 203        * (following CPU designs) adopted an unhelpful definition of integer
 204        * division when the dividend is negative.
 205        *
 206        * Note that I've canonicalized the divisor to be %%\emph{negative}%%,
 207        * because in two's-complement arithmetic, the absolute value of the
 208        * most negative representable value is not itself representable.  The
 209        * residue modulo the most negative value will itself be representable.
 210        */
 211
 212       ii = i%m; if (ii < 0) ii -= m;
 213       aa = ir->a%m; if (aa < 0) aa -= m;
 214       if (ii != aa) {
 215         tvec_error(tv, "%s %ld == %ld =/= %ld (mod %ld)",
 216                    what, i, ii, ir->a, ir->m);
 217         return (-1);
 218       }
 219     }
 220   }
 221   return (0);
 222 }
 223
 224 static int check_unsigned_range(unsigned long u,
 225                                 const struct tvec_urange *ur,
 226                                 struct tvec_state *tv, const char *what)
 227 {
 228   unsigned long uu;
 229
 230   if (ur) {
 231     if (ur->min > u || u > ur->max) {
 232       tvec_error(tv, "%s %lu out of range (must be in [%lu .. %lu])",
 233                  what, u, ur->min, ur->max);
 234       return (-1);
 235     }
 236     if (ur->m && ur->m != 1) {
 237       uu = u%ur->m;
 238       if (uu != ur->a%ur->m) {
 239         tvec_error(tv, "%s %lu == %lu =/= %lu (mod %lu)",
 240                    what, u, uu, ur->a, ur->m);
 241         return (-1);
 242       }
 243     }
 244   }
 245   return (0);
 246 }
 247
 248 /* --- @chtodig@ --- *
 249  *
 250  * Arguments:   @int ch@ = a character
 251  *
 252  * Returns:     The numeric value of the character as a digit, or @-1@ if
 253  *              it's not a digit.  Letters count as extended digits starting
 254  *              with value 10; case is not significant.
 255  */
 256
 257 static int chtodig(int ch)
 258 {
 259   if ('0' <= ch && ch <= '9') return (ch - '0');
 260   else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
 261   else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
 262   else return (-1);
 263 }
 264
 265 /* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
 266  *
 267  * Arguments:   @unsigned long *u_out@, @long *i_out@ = where to put the
 268  *                      result
 269  *              @const char **q_out@ = where to put the end position
 270  *              @const char *p@ = pointer to the string to parse
 271  *
 272  * Returns:     Zero on success, @-1@ on error.
 273  *
 274  * Use:         Parse an integer from a string in the test-vector format.
 275  *              This is mostly extension of the traditional C @strtoul@
 276  *              format: supported inputs include:
 277  *
 278  *                * NNN -- a decimal number (even if it starts with `0');
 279  *                * 0xNNN -- hexadecimal;
 280  *                * 0oNNN -- octal;
 281  *                * 0bNNN -- binary;
 282  *                * NNrNNN -- base NN.
 283  *
 284  *              Furthermore, single underscores are permitted internally as
 285  *              an insignificant digit separator.
 286  */
 287
 288 static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
 289                                   const char *p)
 290 {
 291   unsigned long u;
 292   int ch, d, r;
 293   const char *q;
 294   unsigned f = 0;
 295 #define f_implicit 1u                   /* implicitly reading base 10 */
 296 #define f_digit 2u                      /* read a real digit */
 297 #define f_uscore 4u                     /* found an underscore */
 298
 299   /* Initial setup
 300    *
 301    * This will deal with the traditional `0[box]...' prefixes.  We'll leave
 302    * our new `NNr...' syntax for later.
 303    */
 304   if (p[0] != '0' || !p[1]) {
 305     d = chtodig(*p); if (0 > d || d >= 10) return (-1);
 306     r = 10; u = d; p++; f |= f_implicit | f_digit;
 307   } else {
 308     u = 0; d = chtodig(p[2]);
 309     if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
 310     else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
 311     else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
 312     else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
 313     else { r = 10; f |= f_digit; p++; }
 314   }
 315
 316   q = p;
 317   for (;;) {
 318     /* Work through the string a character at a time. */
 319
 320     ch = *p; switch (ch) {
 321
 322       case '_':
 323         /* An underscore is OK if we haven't just seen one. */
 324
 325         if (f&f_uscore) goto done;
 326         p++; f = (f&~f_implicit) | f_uscore;
 327         break;
 328
 329       case 'r': case 'R':
 330         /* An `r' is OK if the number so far is small enough to be a sensible
 331          * base, and we're scanning decimal implicitly.
 332          */
 333
 334         if (!(f&f_implicit) || !u || u >= 36) goto done;
 335         d = chtodig(p[1]); if (0 > d || d >= u) goto done;
 336         r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
 337         break;
 338
 339       default:
 340         /* Otherwise we expect a valid digit and accumulate it. */
 341         d = chtodig(ch); if (d < 0 || d >= r) goto done;
 342         if (u > ULONG_MAX/r) return (-1);
 343         u *= r; if (u > ULONG_MAX - d) return (-1);
 344         u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
 345         break;
 346     }
 347   }
 348
 349 done:
 350   if (!(f&f_digit)) return (-1);
 351   *u_out = u; *q_out = q; return (0);
 352
 353 #undef f_implicit
 354 #undef f_digit
 355 #undef f_uscore
 356 }
 357
 358 static int parse_signed_integer(long *i_out, const char **q_out,
 359                                 const char *p)
 360 {
 361   unsigned long u;
 362   unsigned f = 0;
 363 #define f_neg 1u
 364
 365   /* Read an initial sign. */
 366   if (*p == '+') p++;
 367   else if (*p == '-') { f |= f_neg; p++; }
 368
 369   /* Scan an unsigned number. */
 370   if (parse_unsigned_integer(&u, q_out, p)) return (-1);
 371
 372   /* Check for signed overflow and apply the sign. */
 373   if (!(f&f_neg)) {
 374     if (u > LONG_MAX) return (-1);
 375     *i_out = u;
 376   } else {
 377     if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
 378     *i_out = u ? -(long)(u - 1) - 1 : 0;
 379   }
 380
 381   return (0);
 382
 383 #undef f_neg
 384 }
 385
 386 /* --- @parse_unsigned@, @parse_signed@ --- *
 387  *
 388  * Arguments:   @unsigned long *u_out@ or @long *i_out@ = where to put the
 389  *                      result
 390  *              @const char *p@ = string to parse
 391  *              @const struct tvec_urange *ur@ or
 392  *                      @const struct tvec_irange *ir@ = range specification,
 393  *                      or null
 394  *              @struct tvec_state *tv@ = test vector state
 395  *
 396  * Returns:     Zero on success, @-1@ on error.
 397  *
 398  * Use:         Parse and range-check an integer.  Unlike @parse_(un)signed_
 399  *              integer@, these functions check that there's no cruft
 400  *              following the final digit, and report errors as they find
 401  *              them rather than leaving that to the caller.
 402  */
 403
 404 static int parse_unsigned(unsigned long *u_out, const char *p,
 405                           const struct tvec_urange *ur,
 406                           struct tvec_state *tv)
 407 {
 408   unsigned long u;
 409   const char *q;
 410
 411   if (parse_unsigned_integer(&u, &q, p))
 412     return (tvec_error(tv, "invalid unsigned integer `%s'", p));
 413   if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
 414   if (check_unsigned_range(u, ur, tv, "integer")) return (-1);
 415   *u_out = u; return (0);
 416 }
 417
 418 static int parse_signed(long *i_out, const char *p,
 419                         const struct tvec_irange *ir,
 420                         struct tvec_state *tv)
 421 {
 422   long i;
 423   const char *q;
 424
 425   if (parse_signed_integer(&i, &q, p))
 426     return (tvec_error(tv, "invalid signed integer `%s'", p));
 427   if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
 428   if (check_signed_range(i, ir, tv, "integer")) return (-1);
 429   *i_out = i; return (0);
 430 }
 431 static const char size_units[] = "kMGTPEZY";
 432
 433 /* --- @parse_szint@ --- *
 434  *
 435  * Arguments:   @struct tvec_state *tv@ = test-vector state
 436  *              @unsigned long *u_out@ = where to put the answer
 437  *              @const char *delims@ = delimiters
 438  *              @const char *what@ = description of what we're parsing
 439  *
 440  * Returns:     Zero on success, %$-1$% on failure.
 441  *
 442  * Use:         Parse a memory size.
 443  */
 444
 445 static int parse_szint(struct tvec_state *tv, unsigned long *u_out,
 446                        const char *delims, const char *what)
 447 {
 448   dstr d = DSTR_INIT;
 449   const char *p, *unit;
 450   unsigned long u, t;
 451   int rc;
 452   unsigned f = 0;
 453 #define f_range 1u
 454
 455   d.a = &tv->p_test->a;
 456   if (tvec_readword(tv, &d, 0, delims, what)) { rc = -1; goto end; }
 457   p = d.buf;
 458   if (parse_unsigned_integer(&u, &p, p)) goto bad;
 459   if (!*p) tvec_readword(tv, &d, &p, delims, 0);
 460
 461   for (t = u, unit = size_units; *unit; unit++) {
 462     if (t > ULONG_MAX/1024) f |= f_range;
 463     else t *= 1024;
 464     if (*p == *unit) {
 465       if (f&f_range) goto rangerr;
 466       u = t; p++; break;
 467     }
 468   }
 469   if (*p == 'B') p++;
 470   if (*p) goto bad;
 471
 472   *u_out = u; rc = 0;
 473 end:
 474   return (rc);
 475
 476 bad:
 477   tvec_error(tv, "invalid %s `%s'", what, d.buf);
 478   rc = -1; goto end;
 479
 480 rangerr:
 481   tvec_error(tv, "%s `%s' out of range", what, d.buf);
 482   rc = -1; goto end;
 483
 484 #undef f_range
 485 }
 486
 487 /* --- @format_size@ --- *
 488  *
 489  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 490  *              @void *go@ = print destination
 491  *              @unsigned long u@ = a size
 492  *              @unsigned style@ = style (@TVSF_...@)
 493  *
 494  * Returns:     ---
 495  *
 496  * Use:         Format @u@ as a size in bytes to the destination, expressing
 497  *              it with a unit prefix if this is possible exactly.
 498  */
 499
 500 static void format_size(const struct gprintf_ops *gops, void *go,
 501                         unsigned long u, unsigned style)
 502 {
 503   const char *unit;
 504
 505   if (style&TVSF_RAW)
 506     gprintf(gops, go, "%lu", u);
 507   else if (!u || u%1024)
 508     gprintf(gops, go, "%lu%sB", u, style&TVSF_COMPACT ? "" : " ");
 509   else {
 510     for (unit = size_units, u /= 1024;
 511          !(u%1024) && unit[1];
 512          u /= 1024, unit++);
 513     gprintf(gops, go, "%lu%s%cB", u, style&TVSF_COMPACT ? "" : " ", *unit);
 514   }
 515 }
 516
 517 /*----- Floating-point utilities ------------------------------------------*/
 518
 519 /* --- @eqish_floating_p@ --- *
 520  *
 521  * Arguments:   @double x, y@ = two numbers to compare
 522  *              @const struct tvec_floatinfo *fi@ = floating-point info
 523  *
 524  * Returns:     Nonzero if  the comparand @x@ is sufficiently close to the
 525  *              reference @y@, or zero if it's definitely different.
 526  */
 527
 528 static int eqish_floating_p(double x, double y,
 529                             const struct tvec_floatinfo *fi)
 530 {
 531   double t, u;
 532
 533   /* NaNs and infinities are equal only to each other. */
 534   if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
 535   if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
 536
 537   /* Compare finite values. */
 538   switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
 539     case TVFF_EXACT:
 540       return (x == y && NEGP(x) == NEGP(y));
 541     case TVFF_ABSDELTA:
 542       t = fabs(y - x); return (t < fi->delta);
 543     case TVFF_RELDELTA:
 544       t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
 545       return (t <= u);
 546     default:
 547       abort();
 548   }
 549 }
 550
 551 /* --- @format_floating@ --- *
 552  *
 553  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 554  *              @void *go@ = print destination
 555  *              @double x@ = number to print
 556  *
 557  * Returns:     ---
 558  *
 559  * Use:         Print a floating-point number, accurately.
 560  */
 561
 562 static void format_floating(const struct gprintf_ops *gops, void *go,
 563                             double x)
 564 {
 565   int prec;
 566
 567   if (NANP(x))
 568     gprintf(gops, go, "#nan");
 569   else if (INFP(x))
 570     gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
 571   else {
 572     /* Ugh.  C doesn't provide any function for just printing a
 573      * floating-point number /correctly/, i.e., so that you can read the
 574      * result back and recover the number you first thought of.  There are
 575      * complicated algorithms published for doing this, but I really don't
 576      * want to get into that here.  So we have this.
 577      *
 578      * The sign doesn't cause significant difficulty so we're going to ignore
 579      * it for now.  So suppose we're given a number %$x = f b^e$%, in
 580      * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
 581      * %$0 \le f < 1$%.  We're going to convert it into the nearest integer
 582      * of the form %$X = F B^E$%, with similar conditions, only with the
 583      * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
 584      * or %$F \ge B^{-N}$%.
 585      *
 586      * We're rounding to the nearest such %$X$%.  If there is to be ambiguity
 587      * in the conversion, then some %$x = f b^e$% and the next smallest
 588      * representable number %$x' = x + b^{e-n}$% must both map to the same
 589      * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
 590      * any other number representable in the target system.  The nest larger
 591      * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
 592      * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
 593      * is actually %$X - B^{E-N-1}$%.  We ignore this latter possibility in
 594      * the pursuit of a conservative estimate (though actually it doesn't
 595      * matter).
 596      *
 597      * If both %$x$% and %$x'$% map to %$X$% then we must have
 598      * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
 599      * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
 600      * and secondly %$b^{e-n} \le B^{E-N}$%.  Since these inequalities are in
 601      * opposite senses, we can divide, giving
 602      *
 603      *         %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
 604      *
 605      * whence
 606      *
 607      *         %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
 608      *
 609      * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
 610      * possible, it must be the case that
 611      *
 612      *         %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
 613      *
 614      * Then rearrange and take logarithms, obtaining
 615      *
 616      *         %$(N - 1) \log B < n \log b$% ,
 617      *
 618      * and so
 619      *
 620      *         %$N < n \log b/\log B + 1$% .
 621      *
 622      * Recall that this is a necessary condition for a collision to occur; we
 623      * are therefore safe whenever
 624      *
 625      *         %$N \ge n \log b/\log B + 1$% ;
 626      *
 627      * so, taking ceilings,
 628      *
 629      *         %$N \ge \lceil n \log b/\log B \rceil + 1$% .
 630      *
 631      * So that's why we have this.
 632      *
 633      * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small
 634      * that we can calculate this without ending up on the wrong side of an
 635      * integer boundary.
 636      *
 637      * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value
 638      * only as a constant.  Except that modern compilers are more than clever
 639      * enough to work out that this is a constant anyway.
 640      *
 641      * This is sometimes an overestimate: we'll print out meaningless digits
 642      * that don't represent anything we actually know about the number in
 643      * question.  To fix that, we'd need a complicated algorithm like Steele
 644      * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
 645      * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
 646      * something else in difficult situations).
 647      */
 648
 649 #ifdef DBL_DECIMAL_DIG
 650     prec = DBL_DECIMAL_DIG;
 651 #else
 652     prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
 653 #endif
 654     gprintf(gops, go, "%.*g", prec, x);
 655   }
 656 }
 657
 658 /* --- @parse_floating@ --- *
 659  *
 660  * Arguments:   @double *x_out@ = where to put the result
 661  *              @const char *q_out@ = where to leave end pointer, or null
 662  *              @const char *p@ = string to parse
 663  *              @const struct tvec_floatinfo *fi@ = floating-point info
 664  *              @struct tvec_state *tv@ = test vector state
 665  *
 666  * Returns:     Zero on success, @-1@ on error.
 667  *
 668  * Use:         Parse a floating-point number from a string.  Reports any
 669  *              necessary errors.  If @q_out@ is not null then trailing
 670  *              material is permitted and a pointer to it (or the end of the
 671  *              string) is left in @*q_out@.
 672  */
 673
 674 static int parse_floating(double *x_out, const char **q_out, const char *p,
 675                           const struct tvec_floatinfo *fi,
 676                           struct tvec_state *tv)
 677 {
 678   const char *pp; char *q;
 679   dstr d = DSTR_INIT;
 680   double x;
 681   int olderr, rc;
 682
 683   d.a = &tv->p_test->a;
 684
 685   /* Check for special tokens. */
 686   if (STRCMP(p, ==, "#nan")) {
 687 #ifdef NAN
 688     if (q_out) *q_out = p + strlen(p);
 689     x = NAN; rc = 0;
 690 #else
 691     tvec_error(tv, "NaN not supported on this system");
 692     rc = -1; goto end;
 693 #endif
 694   }
 695
 696   else if (STRCMP(p, ==, "#inf") ||
 697            STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
 698 #ifdef INFINITY
 699     if (q_out) *q_out = p + strlen(p);
 700     x = INFINITY; rc = 0;
 701 #else
 702     tvec_error(tv, "infinity not supported on this system");
 703     rc = -1; goto end;
 704 #endif
 705   }
 706
 707   else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
 708 #ifdef INFINITY
 709     if (q_out) *q_out = p + strlen(p);
 710     x = -INFINITY; rc = 0;
 711 #else
 712     tvec_error(tv, "infinity not supported on this system");
 713     rc = -1; goto end;
 714 #endif
 715   }
 716
 717   /* Check that this looks like a number, so we can exclude `strtod'
 718    * recognizing its own non-finite number tokens.
 719    */
 720   else {
 721     pp = p;
 722     if (*pp == '+' || *pp == '-') pp++;
 723     if (*pp == '.') pp++;
 724     if (!ISDIGIT(*pp)) {
 725       tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
 726       rc = -1; goto end;
 727     }
 728
 729     /* Parse the number using the system parser. */
 730     olderr = errno; errno = 0;
 731 #if __STDC_VERSION__ >= 199901
 732     x = strtod(p, &q);
 733 #else
 734     x = strtold(p, &q);
 735 #endif
 736     if (q_out) *q_out = q;
 737     else if (*q) { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; }
 738     if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
 739       tvec_error(tv, "invalid floating-point number `%.*s': %s",
 740                  (int)(q - p), p, strerror(errno));
 741       rc = -1; goto end;
 742     }
 743     errno = olderr;
 744   }
 745
 746   /* Check that the number is acceptable. */
 747   if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
 748     tvec_error(tv, "#nan not allowed here");
 749     rc = -1; goto end;
 750   }
 751
 752   if (fi &&
 753       ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
 754        (!(fi->f&TVFF_NOMAX) && x > fi->max)) &&
 755       !(INFP(x) && (fi->f&(NEGP(x) ? TVFF_NEGINFOK : TVFF_POSINFOK)))) {
 756     dstr_puts(&d, "floating-point number ");
 757     format_floating(&dstr_printops, &d, x);
 758     dstr_puts(&d, " out of range (must be in ");
 759     if (fi->f&TVFF_NOMIN)
 760       dstr_puts(&d, "(#-inf");
 761     else
 762       { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
 763     dstr_puts(&d, " .. ");
 764     if (fi->f&TVFF_NOMAX)
 765       dstr_puts(&d, "#+inf)");
 766     else
 767       { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
 768     dstr_putc(&d, ')'); dstr_putz(&d);
 769     tvec_error(tv, "%s", d.buf); rc = -1; goto end;
 770   }
 771
 772   /* All done. */
 773   *x_out = x; rc = 0;
 774 end:
 775   return (rc);
 776 }
 777
 778 /*----- String utilities --------------------------------------------------*/
 779
 780 /* Special character name table. */
 781 static const struct chartab {
 782   const char *name;                     /* character name */
 783   int ch;                               /* character value */
 784   unsigned f;                           /* flags: */
 785 #define CTF_PREFER 1u                   /*   preferred name */
 786 #define CTF_SHORT 2u                    /*   short name (compact style) */
 787 } chartab[] = {
 788   { "#eof",             EOF,    CTF_PREFER | CTF_SHORT },
 789   { "#nul",             '\0',   CTF_PREFER },
 790   { "#bell",            '\a',   CTF_PREFER },
 791   { "#ding",            '\a',   0 },
 792   { "#bel",             '\a',   CTF_SHORT },
 793   { "#backspace",       '\b',   CTF_PREFER },
 794   { "#bs",              '\b',   CTF_SHORT },
 795   { "#escape",          '\x1b', CTF_PREFER },
 796   { "#esc",             '\x1b', CTF_SHORT },
 797   { "#formfeed",        '\f',   CTF_PREFER },
 798   { "#ff",              '\f',   CTF_SHORT },
 799   { "#newline",         '\n',   CTF_PREFER },
 800   { "#linefeed",        '\n',   0 },
 801   { "#lf",              '\n',   CTF_SHORT },
 802   { "#nl",              '\n',   0 },
 803   { "#return",          '\r',   CTF_PREFER },
 804   { "#carriage-return", '\r',   0 },
 805   { "#cr",              '\r',   CTF_SHORT },
 806   { "#tab",             '\t',   CTF_PREFER | CTF_SHORT },
 807   { "#horizontal-tab",  '\t',   0 },
 808   { "#ht",              '\t',   0 },
 809   { "#vertical-tab",    '\v',   CTF_PREFER },
 810   { "#vt",              '\v',   CTF_SHORT },
 811   { "#space",           ' ',    0 },
 812   { "#spc",             ' ',    CTF_SHORT },
 813   { "#delete",          '\x7f', CTF_PREFER },
 814   { "#del",             '\x7f', CTF_SHORT },
 815   { 0,                  0,      0 }
 816 };
 817
 818 /* --- @find_charname@ --- *
 819  *
 820  * Arguments:   @int ch@ = character to match
 821  *              @unsigned f@ = flags (@CTF_...@) to match
 822  *
 823  * Returns:     The name of the character, or null if no match is found.
 824  *
 825  * Use:         Looks up a name for a character.  Specifically, it returns
 826  *              the first entry in the @chartab@ table which matches @ch@ and
 827  *              which has one of the flags @f@ set.
 828  */
 829
 830 static const char *find_charname(int ch, unsigned f)
 831 {
 832   const struct chartab *ct;
 833
 834   for (ct = chartab; ct->name; ct++)
 835     if (ct->ch == ch && (ct->f&f)) return (ct->name);
 836   return (0);
 837 }
 838
 839 /* --- @read_charname@ --- *
 840  *
 841  * Arguments:   @int *ch_out@ = where to put the character
 842  *              @const char *p@ = character name
 843  *              @unsigned f@ = flags (@TCF_...@)
 844  *
 845  * Returns:     Zero if a match was found, @-1@ if not.
 846  *
 847  * Use:         Looks up a character by name.  If @RCF_EOFOK@ is set in @f@,
 848  *              then the @EOF@ marker can be matched; otherwise it can't.
 849  */
 850
 851 #define RCF_EOFOK 1u
 852 static int read_charname(int *ch_out, const char *p, unsigned f)
 853 {
 854   const struct chartab *ct;
 855
 856   for (ct = chartab; ct->name; ct++)
 857     if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
 858       { *ch_out = ct->ch; return (0); }
 859   return (-1);
 860 }
 861
 862 /* --- @format_charesc@ --- *
 863  *
 864  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 865  *              @void *go@ = print destination
 866  *              @int ch@ = character to format
 867  *              @unsigned f@ = flags (@FCF_...@)
 868  *
 869  * Returns:     ---
 870  *
 871  * Use:         Format a character as an escape sequence, possibly as part of
 872  *              a larger string.  If @FCF_BRACE@ is set in @f@, then put
 873  *              braces around a `\x...'  code, so that it's suitable for use
 874  *              in a longer string.
 875  */
 876
 877 #define FCF_BRACE 1u
 878 static void format_charesc(const struct gprintf_ops *gops, void *go,
 879                            int ch, unsigned f)
 880 {
 881   switch (ch) {
 882     case '\a': gprintf(gops, go, "\\a"); break;
 883     case '\b': gprintf(gops, go, "\\b"); break;
 884     case '\x1b': gprintf(gops, go, "\\e"); break;
 885     case '\f': gprintf(gops, go, "\\f"); break;
 886     case '\r': gprintf(gops, go, "\\r"); break;
 887     case '\n': gprintf(gops, go, "\\n"); break;
 888     case '\t': gprintf(gops, go, "\\t"); break;
 889     case '\v': gprintf(gops, go, "\\v"); break;
 890     case '\\': gprintf(gops, go, "\\\\"); break;
 891     case '\'': gprintf(gops, go, "\\'"); break;
 892     case '\0':
 893       if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
 894       else gprintf(gops, go, "\\0");
 895       break;
 896     default:
 897       if (f&FCF_BRACE)
 898         gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
 899       else
 900         gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
 901       break;
 902   }
 903 }
 904
 905 /* --- @format_char@ --- *
 906  *
 907  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 908  *              @void *go@ = print destination
 909  *              @int ch@ = character to format
 910  *
 911  * Returns:     ---
 912  *
 913  * Use:         Format a single character.
 914  */
 915
 916 static void format_char(const struct gprintf_ops *gops, void *go, int ch)
 917 {
 918   switch (ch) {
 919     case '\\': case '\'': escape:
 920       gprintf(gops, go, "'");
 921       format_charesc(gops, go, ch, 0);
 922       gprintf(gops, go, "'");
 923       break;
 924     default:
 925       if (!isprint(ch)) goto escape;
 926       gprintf(gops, go, "'%c'", ch);
 927       break;
 928   }
 929 }
 930
 931 /* --- @fill_pattern@ --- *
 932  *
 933  * Arguments:   @void *p@ = destination pointer
 934  *              @size_t sz@ = destination buffer size
 935  *              @const void *pat@ = pointer to pattern
 936  *              @size_t patsz@ = pattern size
 937  *
 938  * Returns:     ---
 939  *
 940  * Use:         Fill the destination buffer with as many copies of the
 941  *              pattern as will fit, followed by as many initial bytes of the
 942  *              pattern will fit in the remaining space.
 943  */
 944
 945 static void fill_pattern(void *p, size_t sz, const void *pat, size_t patsz)
 946 {
 947   unsigned char *q = p;
 948
 949   if (patsz == 1)
 950     memset(q, *(unsigned char *)pat, sz);
 951   else {
 952     if (sz > patsz) {
 953       memcpy(q, pat, patsz); pat = q; q += patsz; sz -= patsz;
 954       while (sz > patsz)
 955         { memcpy(q, pat, patsz); q += patsz; sz -= patsz; patsz *= 2; }
 956     }
 957     memcpy(q, pat, sz);
 958   }
 959 }
 960
 961 /* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
 962  *
 963  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 964  *              @void *go@ = print destination
 965  *              @unsigned long u@ or @long i@ = an integer
 966  *
 967  * Returns:     ---
 968  *
 969  * Use:         Format a (signed or unsigned) integer as a character, if it's
 970  *              in range, printing something like `= 'q''.  It's assumed that
 971  *              a comment marker has already been output.
 972  */
 973
 974 static void maybe_format_unsigned_char
 975   (const struct gprintf_ops *gops, void *go, unsigned long u)
 976 {
 977   const char *p;
 978
 979   p = find_charname(u, CTF_PREFER);
 980   if (p) gprintf(gops, go, " = %s", p);
 981   if (u < UCHAR_MAX)
 982     { gprintf(gops, go, " = "); format_char(gops, go, u); }
 983 }
 984
 985 static void maybe_format_signed_char
 986   (const struct gprintf_ops *gops, void *go, long i)
 987 {
 988   const char *p;
 989
 990   p = find_charname(i, CTF_PREFER);
 991   if (p) gprintf(gops, go, " = %s", p);
 992   if (0 <= i && i < UCHAR_MAX)
 993     { gprintf(gops, go, " = "); format_char(gops, go, i); }
 994 }
 995
 996 /* --- @read_charesc@ --- *
 997  *
 998  * Arguments:   @int *ch_out@ = where to put the result
 999  *              @struct tvec_state *tv@ = test vector state
1000  *
1001  * Returns:     Zero on success, @-1@ on error.
1002  *
1003  * Use:         Parse and convert an escape sequence from @tv@'s input
1004  *              stream, assuming that the initial `\' has already been read.
1005  *              Reports errors as appropriate.
1006  */
1007
1008 static int read_charesc(int *ch_out, struct tvec_state *tv)
1009 {
1010   int ch, i, esc;
1011   unsigned f = 0;
1012 #define f_brace 1u
1013
1014   ch = getc(tv->fp);
1015   switch (ch) {
1016
1017     /* Things we shouldn't find. */
1018     case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
1019
1020     /* Single-character escapes. */
1021     case '\'': *ch_out = '\''; break;
1022     case '\\': *ch_out = '\\'; break;
1023     case '"': *ch_out = '"'; break;
1024     case 'a': *ch_out = '\a'; break;
1025     case 'b': *ch_out = '\b'; break;
1026     case 'e': *ch_out = '\x1b'; break;
1027     case 'f': *ch_out = '\f'; break;
1028     case 'n': *ch_out = '\n'; break;
1029     case 'r': *ch_out = '\r'; break;
1030     case 't': *ch_out = '\t'; break;
1031     case 'v': *ch_out = '\v'; break;
1032
1033     /* Hex escapes, with and without braces. */
1034     case 'x':
1035       ch = getc(tv->fp);
1036       if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
1037       else f &= ~f_brace;
1038       esc = chtodig(ch);
1039       if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
1040       for (;;) {
1041         ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
1042         esc = 16*esc + i;
1043         if (esc > UCHAR_MAX)
1044           return (tvec_error(tv,
1045                              "character code %d out of range", esc));
1046       }
1047       if (!(f&f_brace)) ungetc(ch, tv->fp);
1048       else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1049       *ch_out = esc;
1050       break;
1051
1052     /* Other things, primarily octal escapes. */
1053     case '{':
1054       f |= f_brace; ch = getc(tv->fp);
1055       /* fall through */
1056     default:
1057       if ('0' <= ch && ch < '8') {
1058         i = 1; esc = ch - '0';
1059         for (;;) {
1060           ch = getc(tv->fp);
1061           if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
1062           esc = 8*esc + ch - '0';
1063           i++; if (i >= 3) break;
1064         }
1065         if (f&f_brace) {
1066           ch = getc(tv->fp);
1067           if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1068         }
1069         if (esc > UCHAR_MAX)
1070           return (tvec_error(tv,
1071                              "character code %d out of range", esc));
1072         *ch_out = esc; break;
1073       } else
1074         return (tvec_syntax(tv, ch, "string escape"));
1075   }
1076
1077   /* Done. */
1078   return (0);
1079
1080 #undef f_brace
1081 }
1082
1083 /* --- @read_quoted_string@ --- *
1084  *
1085  * Arguments:   @dstr *d@ = string to write to
1086  *              @int quote@ = initial quote, `'' or `"'
1087  *              @struct tvec_state *tv@ = test vector state
1088  *
1089  * Returns:     Zero on success, @-1@ on error.
1090  *
1091  * Use:         Read the rest of a quoted string into @d@, reporting errors
1092  *              as appropriate.
1093  *
1094  *              A single-quoted string is entirely literal.  A double-quoted
1095  *              string may contain C-like escapes.
1096  */
1097
1098 static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
1099 {
1100   int ch;
1101
1102   for (;;) {
1103     ch = getc(tv->fp);
1104     switch (ch) {
1105       case EOF: case '\n':
1106         return (tvec_syntax(tv, ch, "`%c'", quote));
1107       case '\\':
1108         if (quote == '\'') goto ordinary;
1109         ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
1110         ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
1111         goto ordinary;
1112       default:
1113         if (ch == quote) goto end;
1114       ordinary:
1115         DPUTC(d, ch);
1116         break;
1117     }
1118   }
1119
1120 end:
1121   DPUTZ(d);
1122   return (0);
1123 }
1124
1125 /* --- @collect_bare@ --- *
1126  *
1127  * Arguments:   @dstr *d@ = string to write to
1128  *              @struct tvec_state *tv@ = test vector state
1129  *
1130  * Returns:     Zero on success, @-1@ on error.
1131  *
1132  * Use:         Read barewords and the whitespace between them.  Stop when we
1133  *              encounter something which can't start a bareword.
1134  */
1135
1136 static int collect_bare(dstr *d, struct tvec_state *tv)
1137 {
1138   size_t pos = d->len;
1139   enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
1140   int ch, rc;
1141
1142   for (;;) {
1143     ch = getc(tv->fp);
1144     switch (ch) {
1145       case EOF:
1146         tvec_syntax(tv, ch, "bareword");
1147         rc = -1; goto end;
1148       case '\n':
1149         if (s == ESCAPE) { tv->lno++; goto addch; }
1150         if (s == WORD) pos = d->len;
1151         ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
1152         DPUTC(d, ' '); s = SPACE;
1153         break;
1154       case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
1155         if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
1156         goto addch;
1157       case '\\':
1158         s = ESCAPE;
1159         break;
1160       default:
1161         if (s != ESCAPE && isspace(ch)) {
1162           if (s == WORD) pos = d->len;
1163           DPUTC(d, ch); s = SPACE;
1164           break;
1165         }
1166       addch:
1167         DPUTC(d, ch); s = WORD;
1168     }
1169   }
1170
1171 done:
1172   if (s == SPACE) d->len = pos;
1173   DPUTZ(d); rc = 0;
1174 end:
1175   return (rc);
1176 }
1177
1178 /* --- @set_up_encoding@ --- *
1179  *
1180  * Arguments:   @const codec_class **ccl_out@ = where to put the class
1181  *              @unsigned *f_out@ = where to put the flags
1182  *              @unsigned code@ = the coding scheme to use (@TVEC_...@)
1183  *
1184  * Returns:     ---
1185  *
1186  * Use:         Helper for @read_compound_string@ below.
1187  *
1188  *              Return the appropriate codec class and flags for @code@.
1189  *              Leaves @*ccl_out@ null if the coding scheme doesn't have a
1190  *              backing codec class (e.g., @TVCODE_BARE@).
1191  */
1192
1193 enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
1194 static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
1195                             unsigned code)
1196 {
1197   switch (code) {
1198     case TVCODE_BARE:
1199       *ccl_out = 0; *f_out = 0;
1200       break;
1201     case TVCODE_HEX:
1202       *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
1203       break;
1204     case TVCODE_BASE32:
1205       *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
1206       break;
1207     case TVCODE_BASE64:
1208       *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
1209       break;
1210     default:
1211       abort();
1212   }
1213 }
1214
1215 /* --- @flush_codec@ --- *
1216  *
1217  * Arguments:   @codec *cdc@ = a codec, or null
1218  *              @dstr *d@ = output string
1219  *              @struct tvec_state *tv@ = test vector state
1220  *
1221  * Returns:     Zero on success, @-1@ on error.
1222  *
1223  * Use:         Helper for @read_compound_string@ below.
1224  *
1225  *              Flush out any final buffered material from @cdc@, and check
1226  *              that it's in a good state.  Frees the codec on success.  Does
1227  *              nothing if @cdc@ is null.
1228  */
1229
1230 static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
1231 {
1232   int err;
1233
1234   if (cdc) {
1235     err = cdc->ops->code(cdc, 0, 0, d);
1236     if (err)
1237       return (tvec_error(tv, "invalid %s sequence end: %s",
1238                          cdc->ops->c->name, codec_strerror(err)));
1239     cdc->ops->destroy(cdc);
1240   }
1241   return (0);
1242 }
1243
1244 /* --- @read_compound_string@ --- *
1245  *
1246  * Arguments:   @void **p_inout@ = address of output buffer pointer
1247  *              @size_t *sz_inout@ = address of buffer size
1248  *              @unsigned code@ = initial interpretation of barewords
1249  *              @unsigned f@ = other flags (@RCSF_...@)
1250  *              @struct tvec_state *tv@ = test vector state
1251  *
1252  * Returns:     Zero on success, @-1@ on error.
1253  *
1254  * Use:         Parse a compound string, i.e., a sequence of stringish pieces
1255  *              which might be quoted strings, character names, or barewords
1256  *              to be decoded accoding to @code@, interspersed with
1257  *              additional directives.
1258  *
1259  *              If the initial buffer pointer is non-null and sufficiently
1260  *              large, then it will be reused; otherwise, it is freed and a
1261  *              fresh, sufficiently large buffer is allocated and returned.
1262  *              This buffer unconditionally uses the standard-library arena.
1263  */
1264
1265 #define RCSF_NESTED 1u
1266 static int read_compound_string(void **p_inout, size_t *sz_inout,
1267                                 unsigned code, unsigned f,
1268                                 struct tvec_state *tv)
1269 {
1270   const codec_class *ccl; unsigned cdf;
1271   codec *cdc;
1272   dstr d = DSTR_INIT, w = DSTR_INIT;
1273   char *p;
1274   const char *q;
1275   void *pp = 0; size_t sz;
1276   unsigned long n;
1277   int ch, err, rc;
1278
1279   set_up_encoding(&ccl, &cdf, code); cdc = 0;
1280   d.a = w.a = &tv->p_test->a;
1281
1282   if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
1283   do {
1284     ch = getc(tv->fp);
1285     switch (ch) {
1286
1287       case ')': case ']': case '}':
1288         /* Close brackets.  Leave these for recursive caller if there is one,
1289          * or just complain.
1290          */
1291
1292         if (!(f&RCSF_NESTED))
1293           { rc = tvec_syntax(tv, ch, "string"); goto end; }
1294         ungetc(ch, tv->fp); goto done;
1295
1296       case '"': case '\'':
1297         /* Quotes.  Read a quoted string. */
1298
1299         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1300         cdc = 0;
1301         if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
1302         break;
1303
1304       case '#':
1305         /* A named character. */
1306
1307         ungetc(ch, tv->fp);
1308         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1309         cdc = 0;
1310         DRESET(&w); tvec_readword(tv, &w, 0, ";", "character name");
1311         if (STRCMP(w.buf, ==, "#empty")) break;
1312         if (read_charname(&ch, w.buf, RCF_EOFOK)) {
1313           rc = tvec_error(tv, "unknown character name `%s'", d.buf);
1314           goto end;
1315         }
1316         DPUTC(&d, ch); break;
1317
1318       case '!':
1319         /* A magic keyword. */
1320
1321         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1322         cdc = 0;
1323         ungetc(ch, tv->fp);
1324         DRESET(&w); tvec_readword(tv, &w, 0, ";", "`!'-keyword");
1325
1326         /* Change bareword coding system. */
1327         if (STRCMP(w.buf, ==, "!bare"))
1328           { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
1329         else if (STRCMP(w.buf, ==, "!hex"))
1330           { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
1331         else if (STRCMP(w.buf, ==, "!base32"))
1332           { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
1333         else if (STRCMP(w.buf, ==, "!base64"))
1334           { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
1335
1336         /* Repeated substrings. */
1337         else if (STRCMP(w.buf, ==, "!repeat")) {
1338           if (tvec_nexttoken(tv)) {
1339             rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
1340             goto end;
1341           }
1342           DRESET(&w);
1343           if (tvec_readword(tv, &w, 0, ";{", "repeat count"))
1344             { rc = -1; goto end;  }
1345           if (parse_unsigned_integer(&n, &q, w.buf)) {
1346             rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
1347             goto end;
1348           }
1349           if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
1350           if (tvec_nexttoken(tv))
1351             { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
1352           ch = getc(tv->fp); if (ch != '{')
1353             { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
1354           sz = 0;
1355           if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
1356             { rc = -1; goto end; }
1357           ch = getc(tv->fp); if (ch != '}')
1358             { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
1359           if (sz) {
1360             if (n > (size_t)-1/sz)
1361               { rc = tvec_error(tv, "repeat size out of range"); goto end; }
1362             n *= sz;
1363             dstr_ensure(&d, n);
1364             fill_pattern(d.buf + d.len, n, pp, sz); d.len += n;
1365           }
1366           free(pp); pp = 0;
1367         }
1368
1369         /* Anything else is an error. */
1370         else {
1371           tvec_error(tv, "unknown string keyword `%s'", w.buf);
1372           rc = -1; goto end;
1373         }
1374         break;
1375
1376       default:
1377         /* A bareword.  Process it according to the current coding system. */
1378
1379         switch (code) {
1380           case TVCODE_BARE:
1381             ungetc(ch, tv->fp);
1382             if (collect_bare(&d, tv)) goto done;
1383             break;
1384           default:
1385             assert(ccl);
1386             ungetc(ch, tv->fp); DRESET(&w);
1387             if (tvec_readword(tv, &w, 0, ";",
1388                               "%s-encoded fragment", ccl->name))
1389               { rc = -1; goto end; }
1390             if (!cdc) cdc = ccl->decoder(cdf);
1391             err = cdc->ops->code(cdc, w.buf, w.len, &d);
1392             if (err) {
1393               tvec_error(tv, "invalid %s fragment `%s': %s",
1394                          ccl->name, w.buf, codec_strerror(err));
1395               rc = -1; goto end;
1396             }
1397             break;
1398         }
1399         break;
1400     }
1401   } while (!tvec_nexttoken(tv));
1402
1403 done:
1404   /* Wrap things up. */
1405   if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1406   cdc = 0;
1407   if (*sz_inout <= d.len)
1408     { free(*p_inout); *p_inout = x_alloc(&arena_stdlib, d.len + 1); }
1409   p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
1410   rc = 0;
1411
1412 end:
1413   /* Clean up any debris. */
1414   if (cdc) cdc->ops->destroy(cdc);
1415   if (pp) free(pp);
1416   dstr_destroy(&d); dstr_destroy(&w);
1417   return (rc);
1418 }
1419
1420 /*----- Signed and unsigned integer types ---------------------------------*/
1421
1422 /* --- @init_int@, @init_uint@ --- *
1423  *
1424  * Arguments:   @union tvec_regval *rv@ = register value
1425  *              @const struct tvec_regdef *rd@ = register definition
1426  *
1427  * Returns:     ---
1428  *
1429  * Use:         Initialize a register value.
1430  *
1431  *              Integer values are initialized to zero.
1432  */
1433
1434 static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
1435   { rv->i = 0; }
1436
1437 static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
1438   { rv->u = 0; }
1439
1440 /* --- @eq_int@, @eq_uint@ --- *
1441  *
1442  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
1443  *              @const struct tvec_regdef *rd@ = register definition
1444  *
1445  * Returns:     Nonzero if the values are equal, zero if unequal
1446  *
1447  * Use:         Compare register values for equality.
1448  */
1449
1450 static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
1451                   const struct tvec_regdef *rd)
1452   { return (rv0->i == rv1->i); }
1453
1454 static int eq_uint(const union tvec_regval *rv0,
1455                    const union tvec_regval *rv1,
1456                    const struct tvec_regdef *rd)
1457   { return (rv0->u == rv1->u); }
1458
1459 /* --- @copy_int@, @copy_uint@ --- *
1460  *
1461  * Arguments:   @union tvec_regval *rvd@ = destination register value
1462  *              @const union tvec_regval *rvs@ = source register value
1463  *              @const struct tvec_regdef *rd@ = register definition
1464  *
1465  * Returns:     ---
1466  *
1467  * Use:         Copy a register value.
1468  */
1469
1470 static void copy_int(union tvec_regval *rvd, const union tvec_regval *rvs,
1471                      const struct tvec_regdef *rd)
1472   { rvd->i = rvs->i; }
1473
1474 static void copy_uint(union tvec_regval *rvd, const union tvec_regval *rvs,
1475                       const struct tvec_regdef *rd)
1476   { rvd->u = rvs->u; }
1477
1478 /* --- @tobuf_int@, @tobuf_uint@ --- *
1479  *
1480  * Arguments:   @buf *b@ = buffer
1481  *              @const union tvec_regval *rv@ = register value
1482  *              @const struct tvec_regdef *rd@ = register definition
1483  *
1484  * Returns:     Zero on success, %$-1$% on failure.
1485  *
1486  * Use:         Serialize a register value to a buffer.
1487  *
1488  *              Integer values are serialized as little-endian 64-bit signed
1489  *              or unsigned integers.
1490  */
1491
1492 static int tobuf_int(buf *b, const union tvec_regval *rv,
1493                      const struct tvec_regdef *rd)
1494   { return (signed_to_buf(b, rv->i)); }
1495
1496 static int tobuf_uint(buf *b, const union tvec_regval *rv,
1497                        const struct tvec_regdef *rd)
1498   { return (unsigned_to_buf(b, rv->u)); }
1499
1500 /* --- @frombuf_int@, @frombuf_uint@ --- *
1501  *
1502  * Arguments:   @buf *b@ = buffer
1503  *              @union tvec_regval *rv@ = register value
1504  *              @const struct tvec_regdef *rd@ = register definition
1505  *
1506  * Returns:     Zero on success, %$-1$% on failure.
1507  *
1508  * Use:         Deserialize a register value from a buffer.
1509  *
1510  *              Integer values are serialized as 64-bit signed or unsigned
1511  *              integers.
1512  */
1513
1514 static int frombuf_int(buf *b, union tvec_regval *rv,
1515                        const struct tvec_regdef *rd)
1516   { return (signed_from_buf(b, &rv->i)); }
1517
1518 static int frombuf_uint(buf *b, union tvec_regval *rv,
1519                         const struct tvec_regdef *rd)
1520   { return (unsigned_from_buf(b, &rv->u)); }
1521
1522 /* --- @parse_int@, @parse_uint@ --- *
1523  *
1524  * Arguments:   @union tvec_regval *rv@ = register value
1525  *              @const struct tvec_regdef *rd@ = register definition
1526  *              @struct tvec_state *tv@ = test-vector state
1527  *
1528  * Returns:     Zero on success, %$-1$% on error.
1529  *
1530  * Use:         Parse a register value from an input file.
1531  *
1532  *              Integers may be input in decimal, hex, binary, or octal,
1533  *              following approximately usual conventions.
1534  *
1535  *                * Signed integers may be preceded with a `+' or `-' sign.
1536  *
1537  *                * Decimal integers are just a sequence of decimal digits
1538  *                  `0' ... `9'.
1539  *
1540  *                * Octal integers are a sequence of digits `0' ... `7',
1541  *                  preceded by `0o' or `0O'.
1542  *
1543  *                * Hexadecimal integers are a sequence of digits `0'
1544  *                  ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
1545  *                  `0X'.
1546  *
1547  *                * Radix-B integers are a sequence of digits `0' ... `9',
1548  *                  `a' ... `f', or `A' ... `F', each with value less than B,
1549  *                  preceded by `Br' or `BR', where 0 < B < 36 is expressed
1550  *                  in decimal without any leading `0' or internal
1551  *                  underscores `_'.
1552  *
1553  *                * A digit sequence may contain internal underscore `_'
1554  *                  separators, but not before or after all of the digits;
1555  *                  and two consecutive `_' characters are not permitted.
1556  */
1557
1558 static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
1559                      struct tvec_state *tv)
1560 {
1561   dstr d = DSTR_INIT;
1562   int rc;
1563
1564   d.a = &tv->p_test->a;
1565   if (tvec_readword(tv, &d, 0, ";", "signed integer"))
1566     { rc = -1; goto end; }
1567   if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1568   rc = 0;
1569 end:
1570   return (rc);
1571 }
1572
1573 static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
1574                       struct tvec_state *tv)
1575 {
1576   dstr d = DSTR_INIT;
1577   int rc;
1578
1579   d.a = &tv->p_test->a;
1580   if (tvec_readword(tv, &d, 0, ";", "unsigned integer"))
1581     { rc = -1; goto end; }
1582   if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1583   rc = 0;
1584 end:
1585   return (rc);
1586 }
1587
1588 /* --- @dump_int@, @dump_uint@ --- *
1589  *
1590  * Arguments:   @const union tvec_regval *rv@ = register value
1591  *              @const struct tvec_regdef *rd@ = register definition
1592  *              @unsigned style@ = output style (@TVSF_...@)
1593  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1594  *
1595  * Returns:     ---
1596  *
1597  * Use:         Dump a register value to the format output.
1598  *
1599  *              Integer values are dumped in decimal and, unless compact
1600  *              output is requested, hex, and maybe a character, as a
1601  *              comment.
1602  */
1603
1604 static void dump_int(const union tvec_regval *rv,
1605                      const struct tvec_regdef *rd,
1606                      unsigned style,
1607                      const struct gprintf_ops *gops, void *go)
1608 {
1609   if (style&TVSF_RAW) gprintf(gops, go, "int:");
1610   gprintf(gops, go, "%ld", rv->i);
1611   if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
1612     gprintf(gops, go, " ; = ");
1613     format_signed_hex(gops, go, rv->i);
1614     maybe_format_signed_char(gops, go, rv->i);
1615   }
1616 }
1617
1618 static void dump_uint(const union tvec_regval *rv,
1619                       const struct tvec_regdef *rd,
1620                       unsigned style,
1621                       const struct gprintf_ops *gops, void *go)
1622 {
1623   if (style&TVSF_RAW) gprintf(gops, go, "uint:");
1624   gprintf(gops, go, "%lu", rv->u);
1625   if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
1626     gprintf(gops, go, " ; = ");
1627     format_unsigned_hex(gops, go, rv->u);
1628     maybe_format_unsigned_char(gops, go, rv->u);
1629   }
1630 }
1631
1632 /* Integer type definitions. */
1633 const struct tvec_regty tvty_int = {
1634   init_int, trivial_release, eq_int, copy_int,
1635   tobuf_int, frombuf_int,
1636   parse_int, dump_int
1637 };
1638 const struct tvec_regty tvty_uint = {
1639   init_uint, trivial_release, eq_uint, copy_uint,
1640   tobuf_uint, frombuf_uint,
1641   parse_uint, dump_uint
1642 };
1643
1644 /* Predefined integer ranges. */
1645 const struct tvec_irange
1646   tvrange_schar = { SCHAR_MIN, SCHAR_MAX, 0, 0 },
1647   tvrange_short = { SHRT_MIN, SHRT_MAX, 0, 0 },
1648   tvrange_int = { INT_MIN, INT_MAX, 0, 0 },
1649   tvrange_long = { LONG_MIN, LONG_MAX, 0, 0 },
1650   tvrange_sbyte = { -128, 127, 0, 0 },
1651   tvrange_i16 = { -32768, +32767, 0, 0 },
1652   tvrange_i32 = { -2147483648, 2147483647, 0, 0 };
1653 const struct tvec_urange
1654   tvrange_uchar = { 0, UCHAR_MAX, 0, 0 },
1655   tvrange_ushort = { 0, USHRT_MAX, 0, 0 },
1656   tvrange_uint = { 0, UINT_MAX, 0, 0 },
1657   tvrange_ulong = { 0, ULONG_MAX, 0, 0 },
1658   tvrange_size = { 0, (size_t)-1, 0, 0 },
1659   tvrange_byte = { 0, 255, 0, 0 },
1660   tvrange_u16 = { 0, 65535, 0, 0 },
1661   tvrange_u32 = { 0, 4294967295, 0, 0 };
1662
1663 /* --- @tvec_claimeq_int@ --- *
1664  *
1665  * Arguments:   @struct tvec_state *tv@ = test-vector state
1666  *              @long i0, i1@ = two signed integers
1667  *              @const char *file@, @unsigned @lno@ = calling file and line
1668  *              @const char *expr@ = the expression to quote on failure
1669  *
1670  * Returns:     Nonzero if @i0@ and @i1@ are equal, otherwise zero.
1671  *
1672  * Use:         Check that values of @i0@ and @i1@ are equal.  As for
1673  *              @tvec_claim@ above, a test case is automatically begun and
1674  *              ended if none is already underway.  If the values are
1675  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
1676  *              mismatched values are dumped: @i0@ is printed as the output
1677  *              value and @i1@ is printed as the input reference.
1678  */
1679
1680 int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
1681                      const char *file, unsigned lno, const char *expr)
1682 {
1683   struct tvec_reg rval, rref;
1684
1685   rval.f = rref.f = TVRF_LIVE; rval.v.i = i0; rref.v.i = i1;
1686   return (tvec_claimeq(tv, &tvty_int, 0, &rval, &rref, file, lno, expr));
1687 }
1688
1689 /* --- @tvec_claimeq_uint@ --- *
1690  *
1691  * Arguments:   @struct tvec_state *tv@ = test-vector state
1692  *              @unsigned long u0, u1@ = two unsigned integers
1693  *              @const char *file@, @unsigned @lno@ = calling file and line
1694  *              @const char *expr@ = the expression to quote on failure
1695  *
1696  * Returns:     Nonzero if @u0@ and @u1@ are equal, otherwise zero.
1697  *
1698  * Use:         Check that values of @u0@ and @u1@ are equal.  As for
1699  *              @tvec_claim@ above, a test case is automatically begun and
1700  *              ended if none is already underway.  If the values are
1701  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
1702  *              mismatched values are dumped: @u0@ is printed as the output
1703  *              value and @u1@ is printed as the input reference.
1704  */
1705
1706 int tvec_claimeq_uint(struct tvec_state *tv,
1707                       unsigned long u0, unsigned long u1,
1708                       const char *file, unsigned lno, const char *expr)
1709 {
1710   struct tvec_reg rval, rref;
1711
1712   rval.f = rref.f = TVRF_LIVE; rval.v.u = u0; rref.v.u = u1;
1713   return (tvec_claimeq(tv, &tvty_uint, 0, &rval, &rref, file, lno, expr));
1714 }
1715
1716 /*----- Size type ---------------------------------------------------------*/
1717
1718 /* --- @parse_size@ --- *
1719  *
1720  * Arguments:   @union tvec_regval *rv@ = register value
1721  *              @const struct tvec_regdef *rd@ = register definition
1722  *              @struct tvec_state *tv@ = test-vector state
1723  *
1724  * Returns:     Zero on success, %$-1$% on error.
1725  *
1726  * Use:         Parse a register value from an input file.
1727  *
1728  *              The input format for a size value consists of an unsigned
1729  *              integer followed by an optional unit specifier consisting of
1730  *              an SI unit prefix and (optionally) the letter `B'. */
1731
1732 static int parse_size(union tvec_regval *rv, const struct tvec_regdef *rd,
1733                       struct tvec_state *tv)
1734 {
1735   unsigned long sz;
1736   int rc;
1737
1738   if (parse_szint(tv, &sz, ";", "size")) { rc = -1; goto end; }
1739   if (check_unsigned_range(sz, rd->arg.p, tv, "size")) { rc = -1; goto end; }
1740   rv->u = sz; rc = 0;
1741 end:
1742   return (rc);
1743 }
1744
1745 /* --- @dump_size@ --- *
1746  *
1747  * Arguments:   @const union tvec_regval *rv@ = register value
1748  *              @const struct tvec_regdef *rd@ = register definition
1749  *              @unsigned style@ = output style (@TVSF_...@)
1750  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1751  *
1752  * Returns:     ---
1753  *
1754  * Use:         Dump a register value to the format output.
1755  *
1756  *              Size values are dumped with a unit specifier, with a unit
1757  *              prefox only if the size is an exact multiple of the relevant
1758  *              power of two.  Unless compact style is requested, the plain
1759  *              decimal and hex representations of the value are also
1760  *              printed.
1761  */
1762
1763 static void dump_size(const union tvec_regval *rv,
1764                       const struct tvec_regdef *rd,
1765                       unsigned style,
1766                       const struct gprintf_ops *gops, void *go)
1767 {
1768   if (style&TVSF_RAW) gprintf(gops, go, "size:");
1769   format_size(gops, go, rv->u, style);
1770   if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
1771     gprintf(gops, go, " ; = %lu", (unsigned long)rv->u);
1772     gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->u);
1773     maybe_format_unsigned_char(gops, go, rv->u);
1774   }
1775 }
1776
1777 /* Size type definitions. */
1778 const struct tvec_regty tvty_size = {
1779   init_uint, trivial_release, eq_uint, copy_uint,
1780   tobuf_uint, frombuf_uint,
1781   parse_size, dump_size
1782 };
1783
1784 /* --- @tvec_claimeq_size@ --- *
1785  *
1786  * Arguments:   @struct tvec_state *tv@ = test-vector state
1787  *              @unsigned long sz0, sz1@ = two sizes
1788  *              @const char *file@, @unsigned @lno@ = calling file and line
1789  *              @const char *expr@ = the expression to quote on failure
1790  *
1791  * Returns:     Nonzero if @sz0@ and @sz1@ are equal, otherwise zero.
1792  *
1793  * Use:         Check that values of @u0@ and @u1@ are equal.  As for
1794  *              @tvec_claim@ above, a test case is automatically begun and
1795  *              ended if none is already underway.  If the values are
1796  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
1797  *              mismatched values are dumped: @u0@ is printed as the output
1798  *              value and @u1@ is printed as the input reference.
1799  */
1800
1801 int tvec_claimeq_size(struct tvec_state *tv,
1802                       unsigned long sz0, unsigned long sz1,
1803                       const char *file, unsigned lno, const char *expr)
1804 {
1805   struct tvec_reg rval, rref;
1806
1807   rval.f = rref.f = TVRF_LIVE; rval.v.u = sz0; rref.v.u = sz1;
1808   return (tvec_claimeq(tv, &tvty_size, 0, &rval, &rref, file, lno, expr));
1809 }
1810
1811 /*----- Floating-point type -----------------------------------------------*/
1812
1813 /* --- @int_float@ --- *
1814  *
1815  * Arguments:   @union tvec_regval *rv@ = register value
1816  *              @const struct tvec_regdef *rd@ = register definition
1817  *
1818  * Returns:     ---
1819  *
1820  * Use:         Initialize a register value.
1821  *
1822  *              Floating-point values are initialized to zero.
1823  */
1824
1825 static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
1826   { rv->f = 0.0; }
1827
1828 /* --- @eq_float@ --- *
1829  *
1830  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
1831  *              @const struct tvec_regdef *rd@ = register definition
1832  *
1833  * Returns:     Nonzero if the values are equal, zero if unequal
1834  *
1835  * Use:         Compare register values for equality.
1836  *
1837  *              Floating-point values may be considered equal if their
1838  *              absolute or relative difference is sufficiently small, as
1839  *              described in the register definition.
1840  */
1841
1842 static int eq_float(const union tvec_regval *rv0,
1843                     const union tvec_regval *rv1,
1844                     const struct tvec_regdef *rd)
1845   { return (eqish_floating_p(rv1->f, rv0->f, rd->arg.p)); }
1846
1847 /* --- @copy_float@ --- *
1848  *
1849  * Arguments:   @union tvec_regval *rvd@ = destination register value
1850  *              @const union tvec_regval *rvs@ = source register value
1851  *              @const struct tvec_regdef *rd@ = register definition
1852  *
1853  * Returns:     ---
1854  *
1855  * Use:         Copy a register value.
1856  */
1857
1858 static void copy_float(union tvec_regval *rvd, const union tvec_regval *rvs,
1859                        const struct tvec_regdef *rd)
1860   { rvd->f = rvs->f; }
1861
1862 /* --- @tobuf_float@ --- *
1863  *
1864  * Arguments:   @buf *b@ = buffer
1865  *              @const union tvec_regval *rv@ = register value
1866  *              @const struct tvec_regdef *rd@ = register definition
1867  *
1868  * Returns:     Zero on success, %$-1$% on failure.
1869  *
1870  * Use:         Serialize a register value to a buffer.
1871  *
1872  *              Floating-point values are serialized as little-endian
1873  *              IEEE 754 Binary64.
1874  */
1875
1876 static int tobuf_float(buf *b, const union tvec_regval *rv,
1877                      const struct tvec_regdef *rd)
1878   { return (buf_putf64l(b, rv->f)); }
1879
1880 /* --- @frombuf_float@ --- *
1881  *
1882  * Arguments:   @buf *b@ = buffer
1883  *              @union tvec_regval *rv@ = register value
1884  *              @const struct tvec_regdef *rd@ = register definition
1885  *
1886  * Returns:     Zero on success, %$-1$% on failure.
1887  *
1888  * Use:         Deserialize a register value from a buffer.
1889  *
1890  *              Floating-point values are serialized as little-endian
1891  *              IEEE 754 Binary64.
1892  */
1893
1894 static int frombuf_float(buf *b, union tvec_regval *rv,
1895                          const struct tvec_regdef *rd)
1896 {
1897   double t;
1898   int rc;
1899
1900   rc = buf_getf64l(b, &t); if (!rc) rv->f = t;
1901   return (rc);
1902 }
1903
1904 /* --- @parse_float@ --- *
1905  *
1906  * Arguments:   @union tvec_regval *rv@ = register value
1907  *              @const struct tvec_regdef *rd@ = register definition
1908  *              @struct tvec_state *tv@ = test-vector state
1909  *
1910  * Returns:     Zero on success, %$-1$% on error.
1911  *
1912  * Use:         Parse a register value from an input file.
1913  *
1914  *              Floating-point values are either NaN (%|#nan|%, if supported
1915  *              by the platform); positive or negative infinity (%|#inf|%,
1916  *              %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
1917  *              or %|#-inf|% (preferring the latter), if supported by the
1918  *              platform); or a number in strtod(3) syntax.
1919  */
1920
1921 static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
1922                        struct tvec_state *tv)
1923 {
1924   dstr d = DSTR_INIT;
1925   int rc;
1926
1927   d.a = &tv->p_test->a;
1928   if (tvec_readword(tv, &d, 0, ";", "floating-point number"))
1929     { rc = -1; goto end; }
1930   if (parse_floating(&rv->f, 0, d.buf, rd->arg.p, tv))
1931     { rc = -1; goto end; }
1932   rc = 0;
1933 end:
1934   return (rc);
1935 }
1936
1937 /* --- @dump_float@ --- *
1938  *
1939  * Arguments:   @const union tvec_regval *rv@ = register value
1940  *              @const struct tvec_regdef *rd@ = register definition
1941  *              @unsigned style@ = output style (@TVSF_...@)
1942  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1943  *
1944  * Returns:     ---
1945  *
1946  * Use:         Dump a register value to the format output.
1947  *
1948  *              Floating-point values are dumped in decimal or as a special
1949  *              token beginning with `%|#|%'.  Some effort is taken to ensure
1950  *              that the output is sufficient to uniquely identify the
1951  *              original value, but, honestly, C makes this really hard.
1952  */
1953
1954 static void dump_float(const union tvec_regval *rv,
1955                        const struct tvec_regdef *rd,
1956                        unsigned style,
1957                        const struct gprintf_ops *gops, void *go)
1958 {
1959   if (style&TVSF_RAW) gprintf(gops, go, "float:");
1960   format_floating(gops, go, rv->f);
1961 }
1962
1963 /* Floating-point type definition. */
1964 const struct tvec_regty tvty_float = {
1965   init_float, trivial_release, eq_float, copy_float,
1966   tobuf_float, frombuf_float,
1967   parse_float, dump_float
1968 };
1969
1970 /* Predefined floating-point ranges. */
1971 const struct tvec_floatinfo
1972   tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
1973                   -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
1974   tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
1975                    -DBL_MAX, DBL_MAX, 0.0 },
1976   tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
1977   tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };
1978
1979 /* --- @tvec_claimeqish_float@ --- *
1980  *
1981  * Arguments:   @struct tvec_state *tv@ = test-vector state
1982  *              @double f0, f1@ = two floating-point numbers
1983  *              @unsigned f@ = flags (@TVFF_...@)
1984  *              @double delta@ = maximum tolerable difference
1985  *              @const char *file@, @unsigned @lno@ = calling file and line
1986  *              @const char *expr@ = the expression to quote on failure
1987  *
1988  * Returns:     Nonzero if @f0@ and @f1@ are sufficiently close, otherwise
1989  *              zero.
1990  *
1991  * Use:         Check that values of @f0@ and @f1@ are sufficiently close.
1992  *              As for @tvec_claim@ above, a test case is automatically begun
1993  *              and ended if none is already underway.  If the values are
1994  *              too far apart, then @tvec_fail@ is called, quoting @expr@,
1995  *              and the mismatched values are dumped: @f0@ is printed as the
1996  *              output value and @f1@ is printed as the input reference.
1997  *
1998  *              The details for the comparison are as follows.
1999  *
2000  *                * A NaN value matches any other NaN, and nothing else.
2001  *
2002  *                * An infinity matches another infinity of the same sign,
2003  *                  and nothing else.
2004  *
2005  *                * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
2006  *                  representable number matches only itself: in particular,
2007  *                  positive and negative zero are considered distinct.
2008  *                  (This allows tests to check that they land on the correct
2009  *                  side of branch cuts, for example.)
2010  *
2011  *                * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
2012  *                  %$y$% when %$|x - y| < \delta$%.
2013  *
2014  *                * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
2015  *                  %$y$% when %$|1 - x/y| < \delta$%.  (Note that this
2016  *                  criterion is asymmetric.  Write %$x \approx_\delta y$%
2017  *                  if and only if %$|1 - x/y < \delta$%.  Then, for example,
2018  *                  if %$y/(1 + \delta) < x < y (1 - \delta)$%, then
2019  *                  %$x \approx_\delta y$%, but %$y \not\approx_\delta x$%.)
2020  */
2021
2022 int tvec_claimeqish_float(struct tvec_state *tv,
2023                           double f0, double f1, unsigned f, double delta,
2024                           const char *file, unsigned lno,
2025                           const char *expr)
2026 {
2027   struct tvec_floatinfo fi;
2028   struct tvec_reg rval, rref;
2029   union tvec_misc arg;
2030
2031   fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
2032   rval.f = rref.f = TVRF_LIVE; rval.v.f = f0; rref.v.f = f1;
2033   return (tvec_claimeq(tv, &tvty_float, &arg,
2034                        &rval, &rref, file, lno, expr));
2035 }
2036
2037 /* --- @tvec_claimeq_float@ --- *
2038  *
2039  * Arguments:   @struct tvec_state *tv@ = test-vector state
2040  *              @double f0, f1@ = two floating-point numbers
2041  *              @const char *file@, @unsigned @lno@ = calling file and line
2042  *              @const char *expr@ = the expression to quote on failure
2043  *
2044  * Returns:     Nonzero if @f0@ and @f1@ are identical, otherwise zero.
2045  *
2046  * Use:         Check that values of @f0@ and @f1@ are identical.  The
2047  *              function is exactly equivalent to @tvec_claimeqish_float@
2048  *              with @f == TVFF_EXACT@.
2049  */
2050
2051 int tvec_claimeq_float(struct tvec_state *tv,
2052                        double f0, double f1,
2053                        const char *file, unsigned lno,
2054                        const char *expr)
2055 {
2056   return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
2057                                 file, lno, expr));
2058 }
2059
2060 /*----- Durations ---------------------------------------------------------*/
2061
2062 /* A duration is a floating-point number of seconds.  Initialization and
2063  * teardown, equality comparison, and serialization are as for floating-point
2064  * values.
2065  */
2066
2067 static const struct duration_unit {
2068   const char *unit;
2069   double scale;
2070   unsigned f;
2071 #define DUF_PREFER 1u
2072 } duration_units[] = {
2073   { "Ys",       1e+24,          0 },
2074   { "Zs",       1e+21,          0 },
2075   { "Es",       1e+18,          0 },
2076   { "Ps",       1e+15,          0 },
2077   { "Ts",       1e+12,          0 },
2078   { "Gs",       1e+9,           0 },
2079   { "Ms",       1e+6,           0 },
2080   { "ks",       1e+3,           0 },
2081   { "hs",       1e+2,           0 },
2082   { "das",      1e+1,           0 },
2083
2084   { "yr",       31557600.0,     DUF_PREFER },
2085   { "year",     31557600.0,     0 },
2086   { "years",    31557600.0,     0 },
2087   { "y",        31557600.0,     0 },
2088   { "wk",       604800.0,       DUF_PREFER },
2089   { "week",     604800.0,       0 },
2090   { "weeks",    604800.0,       0 },
2091   { "w",        604800.0,       0 },
2092   { "day",      86400.0,        DUF_PREFER },
2093   { "days",     86400.0,        0 },
2094   { "dy",       86400.0,        0 },
2095   { "d",        86400.0,        0 },
2096   { "hr",       3600.0,         DUF_PREFER },
2097   { "hour",     3600.0,         0 },
2098   { "hours",    3600.0,         0 },
2099   { "h",        3600.0,         0 },
2100   { "min",      60.0,           DUF_PREFER },
2101   { "minute",   60.0,           0 },
2102   { "minutes",  60.0,           0 },
2103   { "m",        60.0,           0 },
2104
2105   { "s",        1.0,            DUF_PREFER },
2106   { "sec",      1.0,            0 },
2107   { "second",   1.0,            0 },
2108   { "seconds",  1.0,            0 },
2109
2110   { "ds",       1e-1,           0 },
2111   { "cs",       1e-2,           0 },
2112   { "ms",       1e-3,           DUF_PREFER },
2113   { "µs",      1e-6,           DUF_PREFER },
2114   { "ns",       1e-9,           DUF_PREFER },
2115   { "ps",       1e-12,          DUF_PREFER },
2116   { "fs",       1e-15,          DUF_PREFER },
2117   { "as",       1e-18,          DUF_PREFER },
2118   { "zs",       1e-21,          DUF_PREFER },
2119   { "ys",       1e-24,          DUF_PREFER },
2120
2121   { 0 }
2122 };
2123
2124 /* --- @tvec_parsedurunit@ --- *
2125  *
2126  * Arguments:   @double *scale_out@ = where to leave the scale
2127  *              @const char **p_inout@ = input unit string, updated
2128  *
2129  * Returns:     Zero on success, %$-1$% on error.
2130  *
2131  * Use:         If @*p_inout@ begins with a unit string followed by the end
2132  *              of the string or some non-alphanumeric character, then store
2133  *              the corresponding scale factor in @*scale_out@, advance
2134  *              @*p_inout@ past the unit string, and return zero.  Otherwise,
2135  *              return %$-1$%.
2136  */
2137
2138 int tvec_parsedurunit(double *scale_out, const char **p_inout)
2139 {
2140   const char *p = *p_inout, *q;
2141   const struct duration_unit *u;
2142   size_t n;
2143
2144   while (ISSPACE(*p)) p++;
2145   for (q = p; *q && ISALNUM(*q); q++);
2146   n = q - p; if (!n) { *scale_out = 1.0; return (0); }
2147
2148   for (u = duration_units; u->unit; u++)
2149     if (STRNCMP(p, ==, u->unit, n) && !u->unit[n])
2150       { *scale_out = u->scale; *p_inout = q; return (0); }
2151   return (-1);
2152 }
2153
2154 /* --- @parse_duration@ --- *
2155  *
2156  * Arguments:   @union tvec_regval *rv@ = register value
2157  *              @const struct tvec_regdef *rd@ = register definition
2158  *              @struct tvec_state *tv@ = test-vector state
2159  *
2160  * Returns:     Zero on success, %$-1$% on error.
2161  *
2162  * Use:         Parse a register value from an input file.
2163  *
2164  *              Duration values are finite nonnegative floating-point
2165  *              numbers in @strtod@ syntax, optionally followed by a unit .
2166  */
2167
2168 static int parse_duration(union tvec_regval *rv,
2169                           const struct tvec_regdef *rd,
2170                           struct tvec_state *tv)
2171 {
2172   const struct duration_unit *u;
2173   const char *q;
2174   dstr d = DSTR_INIT;
2175   double t;
2176   int rc;
2177
2178   d.a = &tv->p_test->a;
2179   if (tvec_readword(tv, &d, 0, ";", "duration")) { rc = -1; goto end; }
2180   if (parse_floating(&t, &q, d.buf,
2181                      rd->arg.p ? rd->arg.p : &tvflt_nonneg, tv))
2182     { rc = -1; goto end; }
2183
2184   if (!*q) tvec_readword(tv, &d, &q, ";", 0);
2185   if (*q) {
2186     for (u = duration_units; u->unit; u++)
2187       if (STRCMP(q, ==, u->unit)) { t *= u->scale; goto found_unit; }
2188     rc = tvec_syntax(tv, *q, "end-of-line"); goto end;
2189   found_unit:;
2190   }
2191
2192   rv->f = t; rc = 0;
2193 end:
2194   return (rc);
2195 }
2196
2197 /* --- @dump_duration@ --- *
2198  *
2199  * Arguments:   @const union tvec_regval *rv@ = register value
2200  *              @const struct tvec_regdef *rd@ = register definition
2201  *              @unsigned style@ = output style (@TVSF_...@)
2202  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2203  *
2204  * Returns:     ---
2205  *
2206  * Use:         Dump a register value to the format output.
2207  *
2208  *              Durations are dumped as a human-palatable scaled value with
2209  *              unit, and, if compact style is not requested, as a raw number
2210  *              of seconds at full precision as a comment.
2211  */
2212
2213 static void dump_duration(const union tvec_regval *rv,
2214                           const struct tvec_regdef *rd,
2215                           unsigned style,
2216                           const struct gprintf_ops *gops, void *go)
2217 {
2218   const struct duration_unit *u;
2219   double t = rv->f;
2220
2221   if (style&TVSF_RAW) {
2222     gprintf(gops, go, "duration:");
2223     format_floating(gops, go, rv->f);
2224     gprintf(gops, go, "s");
2225   } else {
2226     if (!t) u = 0;
2227     else {
2228       for (u = duration_units; u->scale > t && u[1].unit; u++);
2229       t /= u->scale;
2230     }
2231     gprintf(gops, go, "%.4g %s", t, u ? u->unit : "s");
2232
2233     if (!(style&TVSF_COMPACT)) {
2234       gprintf(gops, go, "; = ");
2235       format_floating(gops, go, rv->f);
2236       gprintf(gops, go, " s");
2237     }
2238   }
2239 }
2240
2241 /* Duration type definition. */
2242 const struct tvec_regty tvty_duration = {
2243   init_float, trivial_release, eq_float, copy_float,
2244   tobuf_float, frombuf_float,
2245   parse_duration, dump_duration
2246 };
2247
2248 /* --- @tvec_claimeqish_duration@ --- *
2249  *
2250  * Arguments:   @struct tvec_state *tv@ = test-vector state
2251  *              @double t0, t1@ = two durations
2252  *              @unsigned f@ = flags (@TVFF_...@)
2253  *              @double delta@ = maximum tolerable difference
2254  *              @const char *file@, @unsigned @lno@ = calling file and line
2255  *              @const char *expr@ = the expression to quote on failure
2256  *
2257  * Returns:     Nonzero if @t0@ and @t1@ are sufficiently close, otherwise
2258  *              zero.
2259  *
2260  * Use:         Check that values of @t0@ and @t1@ are sufficiently close.
2261  *              This is essentially the same as @tvec_claimeqish_float@, only
2262  *              it dumps the values as durations on a mismatch.
2263  */
2264
2265 int tvec_claimeqish_duration(struct tvec_state *tv,
2266                              double t0, double t1, unsigned f, double delta,
2267                              const char *file, unsigned lno,
2268                              const char *expr)
2269 {
2270   struct tvec_floatinfo fi;
2271   struct tvec_reg rval, rref;
2272   union tvec_misc arg;
2273
2274   fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
2275   rval.f = rref.f = TVRF_LIVE; rval.v.f = t0; rref.v.f = t1;
2276   return (tvec_claimeq(tv, &tvty_duration, &arg,
2277                        &rval, &rref, file, lno, expr));
2278 }
2279
2280 /* --- @tvec_claimeq_duration@ --- *
2281  *
2282  * Arguments:   @struct tvec_state *tv@ = test-vector state
2283  *              @double t0, t1@ = two durations
2284  *              @const char *file@, @unsigned @lno@ = calling file and line
2285  *              @const char *expr@ = the expression to quote on failure
2286  *
2287  * Returns:     Nonzero if @t0@ and @t1@ are identical, otherwise zero.
2288  *
2289  * Use:         Check that values of @t0@ and @t1@ are identical.  The
2290  *              function is exactly equivalent to @tvec_claimeqish_duration@
2291  *              with @f == TVFF_EXACT@.
2292  */
2293
2294 int tvec_claimeq_duration(struct tvec_state *tv,
2295                           double t0, double t1,
2296                           const char *file, unsigned lno,
2297                           const char *expr)
2298 {
2299   return (tvec_claimeqish_duration(tv, t0, t1, TVFF_EXACT, 0.0,
2300                                    file, lno, expr));
2301 }
2302
2303 /*----- Enumerations ------------------------------------------------------*/
2304
2305 /* --- @init_tenum@ --- *
2306  *
2307  * Arguments:   @union tvec_regval *rv@ = register value
2308  *              @const struct tvec_regdef *rd@ = register definition
2309  *
2310  * Returns:     ---
2311  *
2312  * Use:         Initialize a register value.
2313  *
2314  *              Integer and floating-point enumeration values are initialized
2315  *              as their underlying representations.  Pointer enumerations
2316  *              are initialized to %|#nil|%.
2317  */
2318
2319 #define init_ienum init_int
2320 #define init_uenum init_uint
2321 #define init_fenum init_float
2322
2323 static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
2324   { rv->p = 0; }
2325
2326 /* --- @eq_tenum@ --- *
2327  *
2328  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
2329  *              @const struct tvec_regdef *rd@ = register definition
2330  *
2331  * Returns:     Nonzero if the values are equal, zero if unequal
2332  *
2333  * Use:         Compare register values for equality.
2334  *
2335  *              Integer and floating-point enumeration values are compared as
2336  *              their underlying representations; in particular, floating-
2337  *              point enumerations may compare equal if their absolute or
2338  *              relative difference is sufficiently small.  Pointer
2339  *              enumerations are compared as pointers.
2340  */
2341
2342 #define eq_ienum eq_int
2343 #define eq_uenum eq_uint
2344
2345 static int eq_fenum(const union tvec_regval *rv0,
2346                     const union tvec_regval *rv1,
2347                     const struct tvec_regdef *rd)
2348 {
2349   const struct tvec_fenuminfo *ei = rd->arg.p;
2350   return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
2351 }
2352
2353 static int eq_penum(const union tvec_regval *rv0,
2354                     const union tvec_regval *rv1,
2355                     const struct tvec_regdef *rd)
2356   { return (rv0->p == rv1->p); }
2357
2358 /* --- @copy_tenum@ --- *
2359  *
2360  * Arguments:   @union tvec_regval *rvd@ = destination register value
2361  *              @const union tvec_regval *rvs@ = source register value
2362  *              @const struct tvec_regdef *rd@ = register definition
2363  *
2364  * Returns:     ---
2365  *
2366  * Use:         Copy a register value.
2367  */
2368
2369 #define copy_ienum copy_int
2370 #define copy_uenum copy_uint
2371 #define copy_fenum copy_float
2372
2373 static void copy_penum(union tvec_regval *rvd, const union tvec_regval *rvs,
2374                        const struct tvec_regdef *rd)
2375   { rvd->p = rvs->p; }
2376
2377 /* --- @tobuf_tenum@ --- *
2378  *
2379  * Arguments:   @buf *b@ = buffer
2380  *              @const union tvec_regval *rv@ = register value
2381  *              @const struct tvec_regdef *rd@ = register definition
2382  *
2383  * Returns:     Zero on success, %$-1$% on failure.
2384  *
2385  * Use:         Serialize a register value to a buffer.
2386  *
2387  *              Integer and floating-point enumeration values are serialized
2388  *              as their underlying representations.  Pointer enumerations
2389  *              are serialized as the signed integer index into the
2390  *              association table; %|#nil|% serializes as %$-1$%, and
2391  *              unrecognized pointers cause failure.
2392  */
2393
2394 #define tobuf_ienum tobuf_int
2395 #define tobuf_uenum tobuf_uint
2396 #define tobuf_fenum tobuf_float
2397
2398 static int tobuf_penum(buf *b, const union tvec_regval *rv,
2399                        const struct tvec_regdef *rd)
2400 {
2401   const struct tvec_penuminfo *pei = rd->arg.p;
2402   const struct tvec_passoc *pa;
2403   long i;
2404
2405   for (pa = pei->av, i = 0; pa->tag; pa++, i++)
2406     if (pa->p == rv->p) goto found;
2407   if (!rv->p) i = -1;
2408   else return (-1);
2409 found:
2410   return (signed_to_buf(b, i));
2411 }
2412
2413 /* --- @frombuf_tenum@ --- *
2414  *
2415  * Arguments:   @buf *b@ = buffer
2416  *              @union tvec_regval *rv@ = register value
2417  *              @const struct tvec_regdef *rd@ = register definition
2418  *
2419  * Returns:     Zero on success, %$-1$% on failure.
2420  *
2421  * Use:         Deserialize a register value from a buffer.
2422  *
2423  *              Integer and floating-point enumeration values are serialized
2424  *              as their underlying representations.  Pointer enumerations
2425  *              are serialized as the signed integer index into the
2426  *              association table; %|#nil|% serializes as %$-1$%; out-of-
2427  *              range indices cause failure.
2428  */
2429
2430 #define frombuf_ienum frombuf_int
2431 #define frombuf_uenum frombuf_uint
2432 #define frombuf_fenum frombuf_float
2433 static int frombuf_penum(buf *b, union tvec_regval *rv,
2434                         const struct tvec_regdef *rd)
2435 {
2436   const struct tvec_penuminfo *pei = rd->arg.p;
2437   const struct tvec_passoc *pa;
2438   long i, n;
2439
2440   for (pa = pei->av, n = 0; pa->tag; pa++, n++);
2441   if (signed_from_buf(b, &i)) return (-1);
2442   if (0 <= i && i < n) rv->p = UNCONST(void, pei->av[i].p);
2443   else if (i == -1) rv->p = 0;
2444   else { buf_break(b); return (-1); }
2445   return (0);
2446 }
2447
2448 /* --- @parse_tenum@ --- *
2449  *
2450  * Arguments:   @union tvec_regval *rv@ = register value
2451  *              @const struct tvec_regdef *rd@ = register definition
2452  *              @struct tvec_state *tv@ = test-vector state
2453  *
2454  * Returns:     Zero on success, %$-1$% on error.
2455  *
2456  * Use:         Parse a register value from an input file.
2457  *
2458  *              An enumerated value may be given by name or as a literal
2459  *              value.  For enumerations based on numeric types, the literal
2460  *              values can be written in the same syntax as the underlying
2461  *              values.  For enumerations based on pointers, the only
2462  *              permitted literal is %|#nil|%, which denotes a null pointer.
2463  */
2464
2465 #define DEFPARSE_ENUM(tag_, ty, slot)                                   \
2466   static int parse_##slot##enum(union tvec_regval *rv,                  \
2467                                 const struct tvec_regdef *rd,           \
2468                                 struct tvec_state *tv)                  \
2469   {                                                                     \
2470     const struct tvec_##slot##enuminfo *ei = rd->arg.p;                 \
2471     const struct tvec_##slot##assoc *a;                                 \
2472     dstr d = DSTR_INIT;                                                 \
2473     int rc;                                                             \
2474                                                                         \
2475     d.a = &tv->p_test->a;                                               \
2476     if (tvec_readword(tv, &d, 0,                                        \
2477                       ";", "%s tag or " LITSTR_##tag_, ei->name))       \
2478       { rc = -1; goto end; }                                            \
2479     for (a = ei->av; a->tag; a++)                                       \
2480       if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; }        \
2481     MISSING_##tag_                                                      \
2482     done:                                                               \
2483     rc = 0;                                                             \
2484   end:                                                                  \
2485     return (rc);                                                        \
2486   }
2487
2488 #define LITSTR_INT      "literal signed integer"
2489 #define FOUND_INT       rv->i = a->i;
2490 #define MISSING_INT     if (parse_signed(&rv->i, d.buf, ei->ir, tv))    \
2491                           { rc = -1; goto end; }
2492
2493 #define LITSTR_UINT     "literal unsigned integer"
2494 #define FOUND_UINT      rv->u = a->u;
2495 #define MISSING_UINT    if (parse_unsigned(&rv->u, d.buf, ei->ur, tv))  \
2496                           { rc = -1; goto end; }
2497
2498 #define LITSTR_FLT      "literal floating-point number, "               \
2499                           "`#-inf', `#+inf', or `#nan'"
2500 #define FOUND_FLT       rv->f = a->f;
2501 #define MISSING_FLT     if (parse_floating(&rv->f, 0, d.buf, ei->fi, tv)) \
2502                           { rc = -1; goto end; }
2503
2504 #define LITSTR_PTR      "`#nil'"
2505 #define FOUND_PTR       rv->p = UNCONST(void, a->p);
2506 #define MISSING_PTR     if (STRCMP(d.buf, ==, "#nil"))                  \
2507                           rv->p = 0;                                    \
2508                         else {                                          \
2509                           tvec_error(tv, "unknown `%s' value `%s'",     \
2510                                      ei->name, d.buf);                  \
2511                           rc = -1; goto end;                            \
2512                         }
2513
2514 TVEC_MISCSLOTS(DEFPARSE_ENUM)
2515
2516 #undef LITSTR_INT
2517 #undef FOUND_INT
2518 #undef MISSING_INT
2519
2520 #undef LITSTR_UINT
2521 #undef FOUND_UINT
2522 #undef MISSING_UINT
2523
2524 #undef LITSTR_FLT
2525 #undef FOUND_FLT
2526 #undef MISSING_FLT
2527
2528 #undef LITSTR_PTR
2529 #undef FOUND_PTR
2530 #undef MISSING_PTR
2531
2532 #undef DEFPARSE_ENUM
2533
2534 /* --- @dump_tenum@ --- *
2535  *
2536  * Arguments:   @const union tvec_regval *rv@ = register value
2537  *              @const struct tvec_regdef *rd@ = register definition
2538  *              @unsigned style@ = output style (@TVSF_...@)
2539  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2540  *
2541  * Returns:     ---
2542  *
2543  * Use:         Dump a register value to the format output.
2544  *
2545  *              Enumeration values are dumped as their symbolic names, if
2546  *              possible, with the underlying values provided as a comment
2547  *              unless compact output is requested, as for the underlying
2548  *              representation.  A null pointer is printed as %|#nil|%;
2549  *              non-null pointers are printed as %|#<TYPE PTR>|%, with the
2550  *              enumeration TYPE and the raw pointer PTR printed with the
2551  *              system's %|%p|% format specifier.
2552  */
2553
2554
2555 #define DEFDUMP_ENUM(tag_, ty, slot)                                    \
2556   static void dump_##slot##enum(const union tvec_regval *rv,            \
2557                                 const struct tvec_regdef *rd,           \
2558                                 unsigned style,                         \
2559                                 const struct gprintf_ops *gops, void *go) \
2560   {                                                                     \
2561     const struct tvec_##slot##enuminfo *ei = rd->arg.p;                 \
2562     const struct tvec_##slot##assoc *a;                                 \
2563                                                                         \
2564     if (style&TVSF_RAW) gprintf(gops, go, #slot "enum/%s:", ei->name);  \
2565     for (a = ei->av; a->tag; a++)                                       \
2566       if (rv->slot == a->slot) {                                        \
2567         gprintf(gops, go, "%s", a->tag);                                \
2568         if (style&TVSF_COMPACT) return;                                 \
2569         gprintf(gops, go, " ; = "); break;                              \
2570       }                                                                 \
2571                                                                         \
2572     PRINTRAW_##tag_                                                     \
2573   }
2574
2575 #define MAYBE_PRINT_EXTRA                                               \
2576         if (style&TVSF_COMPACT) /* nothing to do */;                    \
2577         else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; }  \
2578         else if (1) { gprintf(gops, go, " = "); goto _extra; }          \
2579         else _extra:
2580
2581 #define PRINTRAW_INT    gprintf(gops, go, "%ld", rv->i);                \
2582                         MAYBE_PRINT_EXTRA {                             \
2583                           format_signed_hex(gops, go, rv->i);           \
2584                           maybe_format_signed_char(gops, go, rv->i);    \
2585                         }
2586
2587 #define PRINTRAW_UINT   gprintf(gops, go, "%lu", rv->u);                \
2588                         MAYBE_PRINT_EXTRA {                             \
2589                           format_unsigned_hex(gops, go, rv->u);         \
2590                           maybe_format_unsigned_char(gops, go, rv->u);  \
2591                         }
2592
2593 #define PRINTRAW_FLT    format_floating(gops, go, rv->f);
2594
2595 #define PRINTRAW_PTR    if (!rv->p) gprintf(gops, go, "#nil");          \
2596                         else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);
2597
2598 TVEC_MISCSLOTS(DEFDUMP_ENUM)
2599
2600 #undef PRINTRAW_INT
2601 #undef PRINTRAW_UINT
2602 #undef PRINTRAW_FLT
2603 #undef PRINTRAW_PTR
2604
2605 #undef MAYBE_PRINT_EXTRA
2606 #undef DEFDUMP_ENUM
2607
2608 /* Enumeration type definitions. */
2609 #define DEFTY_ENUM(tag, ty, slot)                                       \
2610   const struct tvec_regty tvty_##slot##enum = {                         \
2611     init_##slot##enum, trivial_release, eq_##slot##enum, copy_##slot##enum, \
2612     tobuf_##slot##enum, frombuf_##slot##enum,                           \
2613     parse_##slot##enum, dump_##slot##enum                               \
2614   };
2615 TVEC_MISCSLOTS(DEFTY_ENUM)
2616 #undef DEFTY_ENUM
2617
2618 /* Predefined enumeration types. */
2619 static const struct tvec_iassoc bool_assoc[] = {
2620   { "nil",              0 },
2621   { "false",            0 },
2622   { "f",                0 },
2623   { "no",               0 },
2624   { "n",                0 },
2625   { "off",              0 },
2626
2627   { "t",                1 },
2628   { "true",             1 },
2629   { "yes",              1 },
2630   { "y",                1 },
2631   { "on",               1 },
2632
2633   TVEC_ENDENUM
2634 };
2635
2636 const struct tvec_ienuminfo tvenum_bool =
2637   { "bool", bool_assoc, &tvrange_int };
2638
2639 static const struct tvec_iassoc cmp_assoc[] = {
2640   { "<",                -1 },
2641   { "less",             -1 },
2642   { "lt",               -1 },
2643
2644   { "=",                 0 },
2645   { "equal",             0 },
2646   { "eq",                0 },
2647
2648   { ">",                +1 },
2649   { "greater",          +1 },
2650   { "gt",               +1 },
2651
2652   TVEC_ENDENUM
2653 };
2654
2655 const struct tvec_ienuminfo tvenum_cmp =
2656   { "cmp", cmp_assoc, &tvrange_int };
2657
2658 static const struct tvec_passoc dummy_assoc[] = { TVEC_ENDENUM };
2659 static const struct tvec_penuminfo dummy_peinfo = { "pointer", dummy_assoc };
2660
2661 /* --- @tvec_claimeq_tenum@ --- *
2662  *
2663  * Arguments:   @struct tvec_state *tv@ = test-vector state
2664  *              @const struct tvec_typeenuminfo *ei@ = enumeration type info
2665  *              @ty t0, t1@ = two values
2666  *              @const char *file@, @unsigned @lno@ = calling file and line
2667  *              @const char *expr@ = the expression to quote on failure
2668  *
2669  * Returns:     Nonzero if @t0@ and @t1@ are equal, otherwise zero.
2670  *
2671  * Use:         Check that values of @t0@ and @t1@ are equal.  As for
2672  *              @tvec_claim@ above, a test case is automatically begun and
2673  *              ended if none is already underway.  If the values are
2674  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
2675  *              mismatched values are dumped: @t0@ is printed as the output
2676  *              value and @t1@ is printed as the input reference.
2677  */
2678
2679 #define DEFCLAIM(tag, ty, slot)                                         \
2680         int tvec_claimeq_##slot##enum                                   \
2681           (struct tvec_state *tv,                                       \
2682            const struct tvec_##slot##enuminfo *ei, ty e0, ty e1,        \
2683            const char *file, unsigned lno, const char *expr)            \
2684         {                                                               \
2685           union tvec_misc arg;                                          \
2686           struct tvec_reg rval, rref;                                   \
2687                                                                         \
2688           PREFLIGHT_##tag;                                              \
2689           arg.p = ei;                                                   \
2690           rval.f = rref.f = TVRF_LIVE;                                  \
2691           rval.v.slot = GET_##tag(e0); rref.v.slot = GET_##tag(e1);     \
2692           return (tvec_claimeq(tv, &tvty_##slot##enum, &arg,            \
2693                                &rval, &rref, file, lno, expr));         \
2694         }
2695 #define PREFLIGHT_INT do ; while (0)
2696 #define GET_INT(e) (e)
2697 #define PREFLIGHT_UINT do ; while (0)
2698 #define GET_UINT(e) (e)
2699 #define PREFLIGHT_FLT do ; while (0)
2700 #define GET_FLT(e) (e)
2701 #define PREFLIGHT_PTR                                                   \
2702         if (!ei) ei = &dummy_peinfo
2703 #define GET_PTR(e) (UNCONST(void, (e)))
2704 TVEC_MISCSLOTS(DEFCLAIM)
2705 #undef DEFCLAIM
2706 #undef PREFLIGHT_INT
2707 #undef GET_INT
2708 #undef PREFLIGHT_UINT
2709 #undef GET_UINT
2710 #undef PREFLIGHT_FLT
2711 #undef GET_FLT
2712 #undef PREFLIGHT_PTR
2713 #undef GET_PTR
2714
2715 /*----- Flag types --------------------------------------------------------*/
2716
2717 /* Flag types are initialized, compared, and serialized as unsigned
2718  * integers.
2719  */
2720
2721 /* --- @parse_flags@ --- *
2722  *
2723  * Arguments:   @union tvec_regval *rv@ = register value
2724  *              @const struct tvec_regdef *rd@ = register definition
2725  *              @struct tvec_state *tv@ = test-vector state
2726  *
2727  * Returns:     Zero on success, %$-1$% on error.
2728  *
2729  * Use:         Parse a register value from an input file.
2730  *
2731  *              The input syntax is a sequence of items separated by `|'
2732  *              signs.  Each item may be the symbolic name of a field value,
2733  *              or a literal unsigned integer.  The masks associated with the
2734  *              given symbolic names must be disjoint.  The resulting
2735  *              numerical value is simply the bitwise OR of the given values.
2736  */
2737
2738 static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
2739                        struct tvec_state *tv)
2740 {
2741   const struct tvec_flaginfo *fi = rd->arg.p;
2742   const struct tvec_flag *f;
2743   unsigned long m = 0, v = 0, t;
2744   dstr d = DSTR_INIT;
2745   int ch, rc;
2746
2747   d.a = &tv->p_test->a;
2748
2749   for (;;) {
2750
2751     /* Read the next item. */
2752     DRESET(&d);
2753     if (tvec_readword(tv, &d, 0, "|;", "%s flag name or integer", fi->name))
2754       { rc = -1; goto end; }
2755
2756     /* Try to find a matching entry in the table. */
2757     for (f = fi->fv; f->tag; f++)
2758       if (STRCMP(f->tag, ==, d.buf)) {
2759         if (m&f->m)
2760           { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2761         else
2762           { m |= f->m; v |= f->v; goto next; }
2763       }
2764
2765     /* Otherwise, try to parse it as a raw integer. */
2766     if (parse_unsigned(&t, d.buf, fi->range, tv)) { rc = -1; goto end; }
2767     if (m&t) { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2768     v |= t; m |= t;
2769
2770   next:
2771     /* Advance to the next token.  If it's a separator then consume it, and
2772      * go round again.  Otherwise we stop here.
2773      */
2774     if (tvec_nexttoken(tv)) break;
2775     ch = getc(tv->fp);
2776       if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
2777       if (tvec_nexttoken(tv)) {
2778         tvec_syntax(tv, '\n', "%s flag name or integer", fi->name);
2779         rc = -1; goto end;
2780       }
2781   }
2782
2783   /* Done. */
2784   rv->u = v; rc = 0;
2785 end:
2786   return (rc);
2787 }
2788
2789 /* --- @dump_flags@ --- *
2790  *
2791  * Arguments:   @const union tvec_regval *rv@ = register value
2792  *              @const struct tvec_regdef *rd@ = register definition
2793  *              @unsigned style@ = output style (@TVSF_...@)
2794  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2795  *
2796  * Returns:     ---
2797  *
2798  * Use:         Dump a register value to the format output.
2799  *
2800  *              The table of symbolic names and their associated values and
2801  *              masks is repeatedly scanned, in order, to find disjoint
2802  *              matches -- i.e., entries whose value matches the target value
2803  *              in the bit positions indicated by the mask, and whose mask
2804  *              doesn't overlap with any previously found matches; the names
2805  *              are then output, separated by `|'.  Any remaining nonzero
2806  *              bits not covered by any of the matching masks are output as a
2807  *              single literal integer, in hex.
2808  *
2809  *              Unless compact output is requested, or no symbolic names were
2810  *              found, the raw numeric value is also printed in hex, as a
2811  *              comment.
2812  */
2813
2814 static void dump_flags(const union tvec_regval *rv,
2815                        const struct tvec_regdef *rd,
2816                        unsigned style,
2817                        const struct gprintf_ops *gops, void *go)
2818 {
2819   const struct tvec_flaginfo *fi = rd->arg.p;
2820   const struct tvec_flag *f;
2821   unsigned long m = ~0ul, v = rv->u;
2822   const char *sep;
2823
2824   if (style&TVSF_RAW) gprintf(gops, go, "flags/%s:", fi->name);
2825
2826   for (f = fi->fv, sep = ""; f->tag; f++)
2827     if ((m&f->m) && (v&f->m) == f->v) {
2828       gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
2829       sep = style&TVSF_COMPACT ? "|" : " | ";
2830     }
2831
2832   if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);
2833   else if (!v && m == ~0ul) gprintf(gops, go, "0");
2834
2835   if (!(style&(TVSF_COMPACT | TVSF_RAW)))
2836     gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
2837 }
2838
2839 /* Flags type definition. */
2840 const struct tvec_regty tvty_flags = {
2841   init_uint, trivial_release, eq_uint, copy_uint,
2842   tobuf_uint, frombuf_uint,
2843   parse_flags, dump_flags
2844 };
2845
2846 /* --- @tvec_claimeq_flags@ --- *
2847  *
2848  * Arguments:   @struct tvec_state *tv@ = test-vector state
2849  *              @const struct tvec_flaginfo *fi@ = flags type info
2850  *              @unsigned long f0, f1@ = two values
2851  *              @const char *file@, @unsigned @lno@ = calling file and line
2852  *              @const char *expr@ = the expression to quote on failure
2853  *
2854  * Returns:     Nonzero if @f0@ and @f1@ are equal, otherwise zero.
2855  *
2856  * Use:         Check that values of @f0@ and @f1@ are equal.  As for
2857  *              @tvec_claim@ above, a test case is automatically begun and
2858  *              ended if none is already underway.  If the values are
2859  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
2860  *              mismatched values are dumped: @f0@ is printed as the output
2861  *              value and @f1@ is printed as the input reference.
2862  */
2863
2864 int tvec_claimeq_flags(struct tvec_state *tv,
2865                        const struct tvec_flaginfo *fi,
2866                        unsigned long f0, unsigned long f1,
2867                        const char *file, unsigned lno, const char *expr)
2868 {
2869   union tvec_misc arg;
2870   struct tvec_reg rval, rref;
2871
2872   arg.p = fi;
2873   rval.f = rref.f = TVRF_LIVE; rval.v.u = f0; rref.v.u = f1;
2874   return (tvec_claimeq(tv, &tvty_flags, &arg,
2875                        &rval, &rref, file, lno, expr));
2876 }
2877
2878 /*----- Characters --------------------------------------------------------*/
2879
2880 /* Character values are initialized and compared as signed integers. */
2881
2882 /* --- @tobuf_char@ --- *
2883  *
2884  * Arguments:   @buf *b@ = buffer
2885  *              @const union tvec_regval *rv@ = register value
2886  *              @const struct tvec_regdef *rd@ = register definition
2887  *
2888  * Returns:     Zero on success, %$-1$% on failure.
2889  *
2890  * Use:         Serialize a register value to a buffer.
2891  *
2892  *              Character values are serialized as little-endian 32-bit
2893  *              unsigned integers, with %|EOF|% serialized as all-bits-set.
2894  */
2895
2896 static int tobuf_char(buf *b, const union tvec_regval *rv,
2897                       const struct tvec_regdef *rd)
2898 {
2899   uint32 u;
2900
2901   if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
2902   else if (rv->i == EOF) u = MASK32;
2903   else { buf_break(b); return (-1); }
2904   return (buf_putu32l(b, u));
2905 }
2906
2907 /* --- @frombuf_char@ --- *
2908  *
2909  * Arguments:   @buf *b@ = buffer
2910  *              @union tvec_regval *rv@ = register value
2911  *              @const struct tvec_regdef *rd@ = register definition
2912  *
2913  * Returns:     Zero on success, %$-1$% on failure.
2914  *
2915  * Use:         Deserialize a register value from a buffer.
2916  *
2917  *              Character values are serialized as little-endian 32-bit
2918  *              unsigned integers, with %|EOF|% serialized as all-bits-set.
2919  */
2920
2921 static int frombuf_char(buf *b, union tvec_regval *rv,
2922                         const struct tvec_regdef *rd)
2923 {
2924   uint32 u;
2925
2926   if (buf_getu32l(b, &u)) return (-1);
2927   if (0 <= u && u <= UCHAR_MAX) rv->i = u;
2928   else if (u == MASK32) rv->i = EOF;
2929   else { buf_break(b); return (-1); }
2930   return (0);
2931 }
2932
2933 /* --- @parse_char@ --- *
2934  *
2935  * Arguments:   @union tvec_regval *rv@ = register value
2936  *              @const struct tvec_regdef *rd@ = register definition
2937  *              @struct tvec_state *tv@ = test-vector state
2938  *
2939  * Returns:     Zero on success, %$-1$% on error.
2940  *
2941  * Use:         Parse a register value from an input file.
2942  *
2943  *              A character value can be given by symbolic name, with a
2944  *              leading `%|#|%'; or a character or `%|\|%'-escape sequence,
2945  *              optionally in single quotes.
2946  *
2947  *              The following escape sequences and character names are
2948  *              recognized.
2949  *
2950  *              * `%|#eof|%' is the special end-of-file marker.
2951  *
2952  *              * `%|#nul|%' is the NUL character, sometimes used to
2953  *                terminate strings.
2954  *
2955  *              * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
2956  *                character used to ring the terminal bell (or do some other
2957  *                thing to attract the user's attention).
2958  *
2959  *              * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
2960  *                character, used to move the cursor backwords by one cell.
2961  *
2962  *              * %|#escape|% %|#esc|%, or%|\e|% is the escape character,
2963  *                used to introduce special terminal commands.
2964  *
2965  *              * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
2966  *                character, used to separate pages of text.
2967  *
2968  *              * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
2969  *                the newline character, used to terminate lines of text or
2970  *                advance the cursor to the next line (perhaps without
2971  *                returning it to the start of the line).
2972  *
2973  *              * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
2974  *                the carriage-return character, used to return the cursor to
2975  *                the start of the line.
2976  *
2977  *              * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
2978  *                tab character, used to advance the cursor to the next tab
2979  *                stop on the current line.
2980  *
2981  *              * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
2982  *                character.
2983  *
2984  *              * %|#space|%, %|#spc|% is the space character.
2985  *
2986  *              * %|#delete|%, %|#del|% is the delete character, used to
2987  *                erase the most recent character.
2988  *
2989  *              * %|\'|% is the single-quote character.
2990  *
2991  *              * %|\\|% is the backslash character.
2992  *
2993  *              * %|\"|% is the double-quote character.
2994  *
2995  *              * %|\NNN|% or %|\{NNN}|% is the character with code NNN in
2996  *                octal.  The NNN may be up to three digits long.
2997  *
2998  *              * %|\xNN|% or %|\x{NN}|% is the character with code NNN in
2999  *                hexadecimal.
3000  */
3001
3002 static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
3003                       struct tvec_state *tv)
3004 {
3005   dstr d = DSTR_INIT;
3006   int ch, rc;
3007   unsigned f = 0;
3008 #define f_quote 1u
3009
3010   d.a = &tv->p_test->a;
3011
3012   /* Advance until we find something. */
3013   if (tvec_nexttoken(tv))
3014     return (tvec_syntax(tv, fgetc(tv->fp), "character"));
3015
3016   /* Inspect the character to see what we're up against. */
3017   ch = getc(tv->fp);
3018
3019   if (ch == '#') {
3020     /* It looks like a special token.  Push the `%|#|%' back and fetch the
3021      * whole word.  If there's just the `%|#|%' after all, then treat it as
3022      * literal.
3023      */
3024
3025     ungetc(ch, tv->fp);
3026     if (tvec_readword(tv, &d, 0, ";", "character name"))
3027       { rc = -1; goto end; }
3028     if (STRCMP(d.buf, !=, "#")) {
3029       if (read_charname(&ch, d.buf, RCF_EOFOK)) {
3030         rc = tvec_error(tv, "unknown character name `%s'", d.buf);
3031         goto end;
3032       }
3033       rv->i = ch; rc = 0; goto end;
3034     }
3035   }
3036
3037   /* If this is a single quote then we expect to see a matching one later,
3038    * and we should process backslash escapes.  Get the next character and see
3039    * what happens.
3040    */
3041   if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
3042
3043   /* Main character dispatch. */
3044   switch (ch) {
3045
3046     case '\n':
3047       /* A newline.  If we saw a single quote, then treat that as literal.
3048        * Otherwise this is an error.
3049        */
3050       if (!(f&f_quote)) goto nochar;
3051       else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
3052
3053     case EOF:
3054       /* End-of-file.  Similar to newline, but with slightly different
3055        * effects on the parse state.
3056        */
3057       if (!(f&f_quote)) goto nochar;
3058       else { f &= ~f_quote; ch = '\''; goto plain; }
3059
3060     case '\'': nochar:
3061       /* A single quote.  This must be the second of a pair, and there should
3062        * have been a character or escape sequence between them.
3063        */
3064       rc = tvec_syntax(tv, ch, "character"); goto end;
3065
3066     case '\\':
3067       /* A backslash.  Read a character escape. */
3068       if (read_charesc(&ch, tv)) return (-1);
3069
3070     default: plain:
3071       /* Anything else.  Treat as literal. */
3072       rv->i = ch; break;
3073   }
3074
3075   /* If we saw an opening quote, then expect the closing quote. */
3076   if (f&f_quote) {
3077     ch = getc(tv->fp);
3078     if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
3079   }
3080
3081   /* Done. */
3082   rc = 0;
3083 end:
3084   return (rc);
3085
3086 #undef f_quote
3087 }
3088
3089 /* --- @dump_char@ --- *
3090  *
3091  * Arguments:   @const union tvec_regval *rv@ = register value
3092  *              @const struct tvec_regdef *rd@ = register definition
3093  *              @unsigned style@ = output style (@TVSF_...@)
3094  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
3095  *
3096  * Returns:     ---
3097  *
3098  * Use:         Dump a register value to the format output.
3099  *
3100  *              Character values are dumped as their symbolic names, if any,
3101  *              or as a character or escape sequence within single quotes
3102  *              (which may be omitted in compact style).  If compact output
3103  *              is not requested, then the single-quoted representation (for
3104  *              characters dumped as symbolic names) and integer code in
3105  *              decimal and hex are printed as a comment.
3106  */
3107
3108 static void dump_char(const union tvec_regval *rv,
3109                       const struct tvec_regdef *rd,
3110                       unsigned style,
3111                       const struct gprintf_ops *gops, void *go)
3112 {
3113   const char *p;
3114   unsigned f = 0;
3115 #define f_semi 1u
3116
3117   if (style&TVSF_RAW) {
3118     /* Print the raw character unconditionally in single quotes. */
3119
3120     gprintf(gops, go, "char:'");
3121     format_char(gops, go, rv->i);
3122     gprintf(gops, go, "'");
3123   } else {
3124     /* Print ina pleasant human-readable way. */
3125
3126     /* Print a character name if we can find one. */
3127     p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
3128     if (p) {
3129       gprintf(gops, go, "%s", p);
3130       if (style&TVSF_COMPACT) return;
3131       else { gprintf(gops, go, " ;"); f |= f_semi; }
3132     }
3133
3134     /* If the character isn't @EOF@ then print it as a single-quoted thing.
3135      * In compact style, see if we can omit the quotes.
3136      */
3137     if (rv->i >= 0) {
3138       if (f&f_semi) gprintf(gops, go, " = ");
3139       switch (rv->i) {
3140         case ' ': case '\\': case '\'': quote:
3141           format_char(gops, go, rv->i);
3142           break;
3143         default:
3144           if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
3145           gprintf(gops, go, "%c", (int)rv->i);
3146           return;
3147       }
3148     }
3149
3150     /* And the character code as an integer. */
3151     if (!(style&TVSF_COMPACT)) {
3152       if (!(f&f_semi)) gprintf(gops, go, " ;");
3153       gprintf(gops, go, " = %ld = ", rv->i);
3154       format_signed_hex(gops, go, rv->i);
3155     }
3156   }
3157
3158 #undef f_semi
3159 }
3160
3161 /* Character type definition. */
3162 const struct tvec_regty tvty_char = {
3163   init_int, trivial_release, eq_int, copy_int,
3164   tobuf_char, frombuf_char,
3165   parse_char, dump_char
3166 };
3167
3168 /* --- @tvec_claimeq_char@ --- *
3169  *
3170  * Arguments:   @struct tvec_state *tv@ = test-vector state
3171  *              @int ch0, ch1@ = two character codes
3172  *              @const char *file@, @unsigned @lno@ = calling file and line
3173  *              @const char *expr@ = the expression to quote on failure
3174  *
3175  * Returns:     Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
3176  *
3177  * Use:         Check that values of @ch0@ and @ch1@ are equal.  As for
3178  *              @tvec_claim@ above, a test case is automatically begun and
3179  *              ended if none is already underway.  If the values are
3180  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
3181  *              mismatched values are dumped: @ch0@ is printed as the output
3182  *              value and @ch1@ is printed as the input reference.
3183  */
3184
3185 int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
3186                       const char *file, unsigned lno, const char *expr)
3187 {
3188   struct tvec_reg rval, rref;
3189
3190   rval.f = rref.f = TVRF_LIVE; rval.v.i = c0; rref.v.i = c1;
3191   return (tvec_claimeq(tv, &tvty_char, 0, &rval, &rref, file, lno, expr));
3192 }
3193
3194 /*----- Text and byte strings ---------------------------------------------*/
3195
3196 /* --- @init_text@, @init_bytes@ --- *
3197  *
3198  * Arguments:   @union tvec_regval *rv@ = register value
3199  *              @const struct tvec_regdef *rd@ = register definition
3200  *
3201  * Returns:     ---
3202  *
3203  * Use:         Initialize a register value.
3204  *
3205  *              Text and binary string values are initialized with a null
3206  *              pointer and zero length.
3207  */
3208
3209 static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
3210   { rv->text.p = 0; rv->text.sz = 0; }
3211
3212 static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
3213   { rv->bytes.p = 0; rv->bytes.sz = 0; }
3214
3215 /* --- @release_string@, @release_bytes@ --- *
3216  *
3217  * Arguments:   @const union tvec_regval *rv@ = register value
3218  *              @const struct tvec_regdef *rd@ = register definition
3219  *
3220  * Returns:     ---
3221  *
3222  * Use:         Release resources held by a register value.
3223  *
3224  *              Text and binary string buffers are freed.
3225  */
3226
3227 static void release_text(union tvec_regval *rv,
3228                          const struct tvec_regdef *rd)
3229   { free(rv->text.p); }
3230
3231 static void release_bytes(union tvec_regval *rv,
3232                           const struct tvec_regdef *rd)
3233   { free(rv->bytes.p); }
3234
3235 /* --- @eq_text@, @eq_bytes@ --- *
3236  *
3237  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
3238  *              @const struct tvec_regdef *rd@ = register definition
3239  *
3240  * Returns:     Nonzero if the values are equal, zero if unequal
3241  *
3242  * Use:         Compare register values for equality.
3243  */
3244
3245 static int eq_text(const union tvec_regval *rv0,
3246                    const union tvec_regval *rv1,
3247                    const struct tvec_regdef *rd)
3248 {
3249   return (rv0->text.sz == rv1->text.sz &&
3250           (!rv0->text.sz ||
3251            MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
3252 }
3253
3254 static int eq_bytes(const union tvec_regval *rv0,
3255                     const union tvec_regval *rv1,
3256                     const struct tvec_regdef *rd)
3257 {
3258   return (rv0->bytes.sz == rv1->bytes.sz &&
3259           (!rv0->bytes.sz ||
3260            MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
3261 }
3262
3263 /* --- @copy_text@, @copy_bytes@ --- *
3264  *
3265  * Arguments:   @union tvec_regval *rvd@ = destination register value
3266  *              @const union tvec_regval *rvs@ = source register value
3267  *              @const struct tvec_regdef *rd@ = register definition
3268  *
3269  * Returns:     ---
3270  *
3271  * Use:         Copy a register value.
3272  */
3273
3274 static void copy_text(union tvec_regval *rvd, const union tvec_regval *rvs,
3275                       const struct tvec_regdef *rd)
3276 {
3277   size_t sz = rvs->text.sz;
3278
3279   if (!sz)
3280     rvd->text.sz = 0;
3281   else {
3282     tvec_alloctext(rvd, sz);
3283     memcpy(rvd->text.p, rvs->text.p, sz); rvd->text.p[sz] = 0;
3284   }
3285 }
3286
3287 static void copy_bytes(union tvec_regval *rvd, const union tvec_regval *rvs,
3288                        const struct tvec_regdef *rd)
3289 {
3290   size_t sz = rvs->bytes.sz;
3291
3292   if (!sz)
3293     rvd->bytes.sz = 0;
3294   else {
3295     tvec_alloctext(rvd, sz);
3296     memcpy(rvd->bytes.p, rvs->bytes.p, sz);
3297   }
3298 }
3299
3300 /* --- @tobuf_text@, @tobuf_bytes@ --- *
3301  *
3302  * Arguments:   @buf *b@ = buffer
3303  *              @const union tvec_regval *rv@ = register value
3304  *              @const struct tvec_regdef *rd@ = register definition
3305  *
3306  * Returns:     Zero on success, %$-1$% on failure.
3307  *
3308  * Use:         Serialize a register value to a buffer.
3309  *
3310  *              Text and binary string values are serialized as a little-
3311  *              endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3312  *              of string data.
3313  */
3314
3315 static int tobuf_text(buf *b, const union tvec_regval *rv,
3316                       const struct tvec_regdef *rd)
3317   { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }
3318
3319 static int tobuf_bytes(buf *b, const union tvec_regval *rv,
3320                        const struct tvec_regdef *rd)
3321   { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }
3322
3323 /* --- @frombuf_text@, @frombuf_bytes@ --- *
3324  *
3325  * Arguments:   @buf *b@ = buffer
3326  *              @union tvec_regval *rv@ = register value
3327  *              @const struct tvec_regdef *rd@ = register definition
3328  *
3329  * Returns:     Zero on success, %$-1$% on failure.
3330  *
3331  * Use:         Deserialize a register value from a buffer.
3332  *
3333  *              Text and binary string values are serialized as a little-
3334  *              endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3335  *              of string data.
3336  */
3337
3338 static int frombuf_text(buf *b, union tvec_regval *rv,
3339                         const struct tvec_regdef *rd)
3340 {
3341   const void *p;
3342   size_t sz;
3343
3344   p = buf_getmem64l(b, &sz); if (!p) return (-1);
3345   tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
3346   return (0);
3347 }
3348
3349 static int frombuf_bytes(buf *b, union tvec_regval *rv,
3350                          const struct tvec_regdef *rd)
3351 {
3352   const void *p;
3353   size_t sz;
3354
3355   p = buf_getmem64l(b, &sz); if (!p) return (-1);
3356   tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
3357   return (0);
3358 }
3359
3360 /* --- @check_string_length@ --- *
3361  *
3362  * Arguments:   @size_t sz@ = found string length
3363  *              @const struct tvec_urange *ur@ = acceptable range
3364  *              @struct tvec_state *tv@ = test-vector state
3365  *
3366  * Returns:     Zero on success, %$-1$% on error.
3367  *
3368  * Use:         Checks that @sz@ is within the bounds described by @ur@,
3369  *              reporting an error if not.
3370  */
3371
3372 static int check_string_length(size_t sz, const struct tvec_urange *ur,
3373                                struct tvec_state *tv)
3374 {
3375   unsigned long uu;
3376
3377   if (ur) {
3378     if  (ur->min > sz || sz > ur->max) {
3379       tvec_error(tv, "invalid string length %lu; must be in [%lu .. %lu]",
3380                  (unsigned long)sz, ur->min, ur->max);
3381       return (-1);
3382     }
3383     if (ur->m && ur->m != 1) {
3384       uu = sz%ur->m;
3385       if (uu != ur->a%ur->m) {
3386         tvec_error(tv, "invalid string length %lu == %lu =/= %lu (mod %lu)",
3387                    (unsigned long)sz, uu, ur->a, ur->m);
3388         return (-1);
3389       }
3390     }
3391   }
3392   return (0);
3393 }
3394
3395 /* --- @parse_text@, @parse_bytes@ --- *
3396  *
3397  * Arguments:   @union tvec_regval *rv@ = register value
3398  *              @const struct tvec_regdef *rd@ = register definition
3399  *              @struct tvec_state *tv@ = test-vector state
3400  *
3401  * Returns:     Zero on success, %$-1$% on error.
3402  *
3403  * Use:         Parse a register value from an input file.
3404  *
3405  *              The input format for both kinds of strings is basically the
3406  *              same: a `compound string', consisting of
3407  *
3408  *                * single-quoted strings, which are interpreted entirely
3409  *                  literally, but can't contain single quotes or newlines;
3410  *
3411  *                * double-quoted strings, in which `%|\|%'-escapes are
3412  *                  interpreted as for characters;
3413  *
3414  *                * character names, marked by an initial `%|#|%' sign;
3415  *
3416  *                * special tokens marked by an initial `%|!|%' sign; or
3417  *
3418  *                * barewords interpreted according to the current coding
3419  *                  scheme.
3420  *
3421  *              The special tokens are
3422  *
3423  *                * `%|!bare|%', which causes subsequent sequences of
3424  *                  barewords to be treated as plain text;
3425  *
3426  *                * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
3427  *                  subsequent barewords to be decoded in the requested
3428  *                  manner.
3429  *
3430  *                * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
3431  *                  which includes %$n$% copies of the (compound) string.
3432  *
3433  *              The only difference between text and binary strings is that
3434  *              the initial coding scheme is %|bare|% for text strings and
3435  *              %|hex|% for binary strings.
3436  */
3437
3438 static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
3439                       struct tvec_state *tv)
3440 {
3441   void *p = rv->text.p;
3442
3443   if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
3444     return (-1);
3445   rv->text.p = p;
3446   if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
3447   return (0);
3448 }
3449
3450 static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
3451                        struct tvec_state *tv)
3452 {
3453   void *p = rv->bytes.p;
3454
3455   if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
3456     return (-1);
3457   rv->bytes.p = p;
3458   if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
3459   return (0);
3460 }
3461
3462 /* --- @dump_text@, @dump_bytes@ --- *
3463  *
3464  * Arguments:   @const union tvec_regval *rv@ = register value
3465  *              @const struct tvec_regdef *rd@ = register definition
3466  *              @unsigned style@ = output style (@TVSF_...@)
3467  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
3468  *
3469  * Returns:     ---
3470  *
3471  * Use:         Dump a register value to the format output.
3472  *
3473  *              Text string values are dumped as plain text, in double quotes
3474  *              if necessary, and using backslash escape sequences for
3475  *              nonprintable characters.  Unless compact output is requested,
3476  *              strings consisting of multiple lines are dumped with each
3477  *              line of the string on a separate output line.
3478  *
3479  *              Binary string values are dumped in hexadecimal.  In compact
3480  *              style, the output simply consists of a single block of hex
3481  *              digits.  Otherwise, the dump is a display consisting of
3482  *              groups of hex digits, with comments showing the offset (if
3483  *              the string is long enough) and the corresponding plain text.
3484  *
3485  *              Empty strings are dumped as %|#empty|%.
3486  */
3487
3488 static void dump_empty(const char *ty, unsigned style,
3489                        const struct gprintf_ops *gops, void *go)
3490 {
3491   if (style&TVSF_RAW) gprintf(gops, go, "%s:", ty);
3492   if (!(style&TVSF_COMPACT)) gprintf(gops, go, "#empty");
3493   if (!(style&(TVSF_COMPACT | TVSF_RAW))) gprintf(gops, go, " ; = ");
3494   if (!(style&TVSF_RAW)) gprintf(gops, go, "\"\"");
3495 }
3496
3497
3498 static void dump_text(const union tvec_regval *rv,
3499                       const struct tvec_regdef *rd,
3500                       unsigned style,
3501                       const struct gprintf_ops *gops, void *go)
3502 {
3503   const unsigned char *p, *q, *l;
3504   unsigned f = 0;
3505 #define f_nonword 1u
3506 #define f_newline 2u
3507
3508   if (!rv->text.sz) { dump_empty("text", style, gops, go); return; }
3509
3510   p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
3511   if (style&TVSF_RAW) { gprintf(gops, go, "text:"); goto quote; }
3512   else if (style&TVSF_COMPACT) goto quote;
3513
3514   switch (*p) {
3515     case '!': case '#': case ';': case '"': case '\'':
3516     case '(': case '{': case '[': case ']': case '}': case ')':
3517       f |= f_nonword; break;
3518   }
3519   for (q = p; q < l; q++)
3520     if (*q == '\n' && q != l - 1) f |= f_newline;
3521     else if (!*q || !ISGRAPH(*q) || *q == '\\') f |= f_nonword;
3522   if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
3523   else if (f&f_nonword) goto quote;
3524
3525   gops->putm(go, (const char *)p, rv->text.sz);
3526   return;
3527
3528 quote:
3529   gprintf(gops, go, "\"");
3530   for (q = p; q < l; q++)
3531     if (!ISPRINT(*q) || *q == '"') {
3532       if (p < q) gops->putm(go, (const char *)p, q - p);
3533       if (*q != '\n' || (style&TVSF_COMPACT))
3534         format_charesc(gops, go, *q, FCF_BRACE);
3535       else {
3536         if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; }
3537         else gprintf(gops, go, "\\n\"\n\t\"");
3538       }
3539       p = q + 1;
3540     }
3541   if (p < q) gops->putm(go, (const char *)p, q - p);
3542   gprintf(gops, go, "\"");
3543
3544 #undef f_nonword
3545 #undef f_newline
3546 }
3547
3548 static void dump_bytes(const union tvec_regval *rv,
3549                        const struct tvec_regdef *rd,
3550                        unsigned style,
3551                        const struct gprintf_ops *gops, void *go)
3552 {
3553   const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
3554   size_t off, sz = rv->bytes.sz;
3555   unsigned i, n, w;
3556   int wd;
3557
3558   if (!rv->text.sz) { dump_empty("bytes", style, gops, go); return; }
3559
3560   if (style&(TVSF_COMPACT | TVSF_RAW)) {
3561     if (style&TVSF_RAW) gprintf(gops, go, "bytes:");
3562     while (p < l) gprintf(gops, go, "%02x", *p++);
3563     return;
3564   }
3565
3566   if (sz <= 16) w = sz;
3567   else { gprintf(gops, go, "\n\t"); w = 16; }
3568
3569   off = 0; wd = hex_width(sz);
3570   while (p < l) {
3571     if (l - p < 16) n = l - p;
3572     else n = 16;
3573
3574     for (i = 0; i < w; i++) {
3575       if (i < n) gprintf(gops, go, "%02x", p[i]);
3576       else gprintf(gops, go, "  ");
3577       if (i < w - 1 && i%4 == 3) gprintf(gops, go, " ");
3578     }
3579     gprintf(gops, go, " ; ");
3580     if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
3581     for (i = 0; i < n; i++)
3582       gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
3583     p += n; off += n;
3584     if (p < l) gprintf(gops, go, "\n\t");
3585   }
3586 }
3587
3588 /* Text and byte string type definitions. */
3589 const struct tvec_regty tvty_text = {
3590   init_text, release_text, eq_text, copy_text,
3591   tobuf_text, frombuf_text,
3592   parse_text, dump_text
3593 };
3594 const struct tvec_regty tvty_bytes = {
3595   init_bytes, release_bytes, eq_bytes, copy_bytes,
3596   tobuf_bytes, frombuf_bytes,
3597   parse_bytes, dump_bytes
3598 };
3599
3600 /* --- @tvec_claimeq_text@ --- *
3601  *
3602  * Arguments:   @struct tvec_state *tv@ = test-vector state
3603  *              @const char *p0@, @size_t sz0@ = first string with length
3604  *              @const char *p1@, @size_t sz1@ = second string with length
3605  *              @const char *file@, @unsigned @lno@ = calling file and line
3606  *              @const char *expr@ = the expression to quote on failure
3607  *
3608  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3609  *              zero.
3610  *
3611  * Use:         Check that strings at @p0@ and @p1@ are equal.  As for
3612  *              @tvec_claim@ above, a test case is automatically begun and
3613  *              ended if none is already underway.  If the values are
3614  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
3615  *              mismatched values are dumped: @p0@ is printed as the output
3616  *              value and @p1@ is printed as the input reference.
3617  */
3618
3619 int tvec_claimeq_text(struct tvec_state *tv,
3620                       const char *p0, size_t sz0,
3621                       const char *p1, size_t sz1,
3622                       const char *file, unsigned lno, const char *expr)
3623 {
3624   struct tvec_reg rval, rref;
3625
3626   rval.f = rref.f = TVRF_LIVE;
3627   rval.v.text.p = UNCONST(char, p0); rval.v.text.sz = sz0;
3628   rref.v.text.p = UNCONST(char, p1); rref.v.text.sz = sz1;
3629   return (tvec_claimeq(tv, &tvty_text, 0, &rval, &rref, file, lno, expr));
3630 }
3631
3632 /* --- @tvec_claimeq_textz@ --- *
3633  *
3634  * Arguments:   @struct tvec_state *tv@ = test-vector state
3635  *              @const char *p0, *p1@ = two strings to compare
3636  *              @const char *file@, @unsigned @lno@ = calling file and line
3637  *              @const char *expr@ = the expression to quote on failure
3638  *
3639  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3640  *              zero.
3641  *
3642  * Use:         Check that strings at @p0@ and @p1@ are equal, as for
3643  *              @tvec_claimeq_string@, except that the strings are assumed
3644  *              null-terminated, so their lengths don't need to be supplied
3645  *              explicitly.
3646  */
3647
3648 int tvec_claimeq_textz(struct tvec_state *tv,
3649                        const char *p0, const char *p1,
3650                        const char *file, unsigned lno, const char *expr)
3651 {
3652   struct tvec_reg rval, rref;
3653
3654   rval.f = rref.f = TVRF_LIVE;
3655   rval.v.text.p = UNCONST(char, p0); rval.v.text.sz = strlen(p0);
3656   rref.v.text.p = UNCONST(char, p1); rref.v.text.sz = strlen(p1);
3657   return (tvec_claimeq(tv, &tvty_text, 0, &rval, &rref, file, lno, expr));
3658 }
3659
3660 /* --- @tvec_claimeq_bytes@ --- *
3661  *
3662  * Arguments:   @struct tvec_state *tv@ = test-vector state
3663  *              @const void *p0@, @size_t sz0@ = first string with length
3664  *              @const void *p1@, @size_t sz1@ = second string with length
3665  *              @const char *file@, @unsigned @lno@ = calling file and line
3666  *              @const char *expr@ = the expression to quote on failure
3667  *
3668  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3669  *              zero.
3670  *
3671  * Use:         Check that binary strings at @p0@ and @p1@ are equal.  As for
3672  *              @tvec_claim@ above, a test case is automatically begun and
3673  *              ended if none is already underway.  If the values are
3674  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
3675  *              mismatched values are dumped: @p0@ is printed as the output
3676  *              value and @p1@ is printed as the input reference.
3677  */
3678
3679 int tvec_claimeq_bytes(struct tvec_state *tv,
3680                        const void *p0, size_t sz0,
3681                        const void *p1, size_t sz1,
3682                        const char *file, unsigned lno, const char *expr)
3683 {
3684   struct tvec_reg rval, rref;
3685
3686   rval.f = rref.f = TVRF_LIVE;
3687   rval.v.bytes.p = UNCONST(void, p0); rval.v.bytes.sz = sz0;
3688   rref.v.bytes.p = UNCONST(void, p1); rref.v.bytes.sz = sz1;
3689   return (tvec_claimeq(tv, &tvty_bytes, 0, &rval, &rref, file, lno, expr));
3690 }
3691
3692 /* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
3693  *
3694  * Arguments:   @union tvec_regval *rv@ = register value
3695  *              @size_t sz@ = required size
3696  *
3697  * Returns:     ---
3698  *
3699  * Use:         Allocated space in a text or binary string register.  If the
3700  *              current register size is sufficient, its buffer is left
3701  *              alone; otherwise, the old buffer, if any, is freed and a
3702  *              fresh buffer allocated.  These functions are not intended to
3703  *              be used to adjust a buffer repeatedly, e.g., while building
3704  *              output incrementally: (a) they will perform badly, and (b)
3705  *              the old buffer contents are simply discarded if reallocation
3706  *              is necessary.  Instead, use a @dbuf@ or @dstr@.
3707  *
3708  *              The @tvec_alloctext@ function sneakily allocates an extra
3709  *              byte for a terminating zero.  The @tvec_allocbytes@ function
3710  *              doesn't do this.
3711  */
3712
3713 void tvec_alloctext(union tvec_regval *rv, size_t sz)
3714 {
3715   if (rv->text.sz <= sz)
3716     { free(rv->text.p); rv->text.p = x_alloc(&arena_stdlib, sz + 1); }
3717   memset(rv->text.p, '?', sz); rv->text.p[sz] = 0; rv->text.sz = sz;
3718 }
3719
3720 void tvec_allocbytes(union tvec_regval *rv, size_t sz)
3721 {
3722   if (rv->bytes.sz < sz)
3723     { free(rv->bytes.p); rv->bytes.p = x_alloc(&arena_stdlib, sz); }
3724   memset(rv->bytes.p, '?', sz); rv->bytes.sz = sz;
3725 }
3726
3727 /*----- Buffer type -------------------------------------------------------*/
3728
3729 /* --- @init_buffer@ --- *
3730  *
3731  * Arguments:   @union tvec_regval *rv@ = register value
3732  *              @const struct tvec_regdef *rd@ = register definition
3733  *
3734  * Returns:     ---
3735  *
3736  * Use:         Initialize a register value.
3737  *
3738  *              Buffer values values are initialized with a null pointer,
3739  *              zero length, and zero residue, modulus, and offset.
3740  */
3741
3742 static void init_buffer(union tvec_regval *rv, const struct tvec_regdef *rd)
3743   { rv->buf.p = 0; rv->buf.sz = rv->buf.a = rv->buf.m = rv->buf.off = 0; }
3744
3745 /* --- @release_buffer@, @release_bytes@ --- *
3746  *
3747  * Arguments:   @const union tvec_regval *rv@ = register value
3748  *              @const struct tvec_regdef *rd@ = register definition
3749  *
3750  * Returns:     ---
3751  *
3752  * Use:         Release resources held by a register value.
3753  *
3754  *              Buffers are freed.
3755  */
3756
3757 static void release_buffer(union tvec_regval *rv,
3758                            const struct tvec_regdef *rd)
3759   { if (rv->buf.p) free(rv->buf.p - rv->buf.off); }
3760
3761 /* --- @eq_buffer@ --- *
3762  *
3763  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
3764  *              @const struct tvec_regdef *rd@ = register definition
3765  *
3766  * Returns:     Nonzero if the values are equal, zero if unequal
3767  *
3768  * Use:         Compare register values for equality.
3769  *
3770  *              Buffer values are equal if and only if their sizes and
3771  *              alignment parameters are equal; their contents are
3772  *              %%\emph{not}%% compared.
3773  */
3774
3775 static int eq_buffer(const union tvec_regval *rv0,
3776                      const union tvec_regval *rv1,
3777                      const struct tvec_regdef *rd)
3778 {
3779   return (rv0->buf.sz == rv1->buf.sz &&
3780           rv0->buf.a == rv1->buf.a &&
3781           rv0->buf.m == rv1->buf.m);
3782 }
3783
3784 /* --- @copy_buffer@ --- *
3785  *
3786  * Arguments:   @union tvec_regval *rvd@ = destination register value
3787  *              @const union tvec_regval *rvs@ = source register value
3788  *              @const struct tvec_regdef *rd@ = register definition
3789  *
3790  * Returns:     ---
3791  *
3792  * Use:         Copy a register value.
3793  */
3794
3795 static void copy_buffer(union tvec_regval *rvd, const union tvec_regval *rvs,
3796                         const struct tvec_regdef *rd)
3797 {
3798   if (rvd->buf.p) { free(rvd->buf.p); rvd->buf.p = 0; }
3799   rvd->buf.sz = rvs->buf.sz;
3800   rvd->buf.a = rvs->buf.a;
3801   rvd->buf.m = rvs->buf.m;
3802   rvd->buf.off = 0;
3803 }
3804
3805 /* --- @tobuf_buffer@ --- *
3806  *
3807  * Arguments:   @buf *b@ = buffer
3808  *              @const union tvec_regval *rv@ = register value
3809  *              @const struct tvec_regdef *rd@ = register definition
3810  *
3811  * Returns:     Zero on success, %$-1$% on failure.
3812  *
3813  * Use:         Serialize a register value to a buffer.
3814  *
3815  *              Buffer values are serialized as their lengths, residues, and
3816  *              moduli, as unsigned integers.
3817  */
3818
3819 static int tobuf_buffer(buf *b, const union tvec_regval *rv,
3820                          const struct tvec_regdef *rd)
3821 {
3822   return (unsigned_to_buf(b, rv->buf.sz) ||
3823           unsigned_to_buf(b, rv->buf.a) ||
3824           unsigned_to_buf(b, rv->buf.m));
3825 }
3826
3827 /* --- @frombuf_buffer@ --- *
3828  *
3829  * Arguments:   @buf *b@ = buffer
3830  *              @union tvec_regval *rv@ = register value
3831  *              @const struct tvec_regdef *rd@ = register definition
3832  *
3833  * Returns:     Zero on success, %$-1$% on failure.
3834  *
3835  * Use:         Deserialize a register value from a buffer.
3836  *
3837  *              Buffer values are serialized as just their lengths, as
3838  *              unsigned integers.  The buffer is allocated on
3839  *              deserialization and filled with a distinctive pattern.
3840  */
3841
3842 static int frombuf_buffer(buf *b, union tvec_regval *rv,
3843                           const struct tvec_regdef *rd)
3844 {
3845   unsigned long sz, a, m;
3846
3847   if (unsigned_from_buf(b, &sz)) return (-1);
3848   if (unsigned_from_buf(b, &a)) return (-1);
3849   if (unsigned_from_buf(b, &m)) return (-1);
3850   if (sz > (size_t)-1 || a > (size_t)-1 || m > (size_t)-1)
3851     { buf_break(b); return (-1); }
3852   rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
3853   return (0);
3854 }
3855
3856 /* --- @parse_buffer@ --- *
3857  *
3858  * Arguments:   @union tvec_regval *rv@ = register value
3859  *              @const struct tvec_regdef *rd@ = register definition
3860  *              @struct tvec_state *tv@ = test-vector state
3861  *
3862  * Returns:     Zero on success, %$-1$% on error.
3863  *
3864  * Use:         Parse a register value from an input file.
3865  *
3866  *              The input format for a buffer value is a size, followed by an
3867  *              optional `%|@$%' and an alignment quantum and a further
3868  *              optional `%|+|%' and an alignment offset.  The size, quantum,
3869  *              and offset are syntactically sizes.
3870  *
3871  *              The buffer is not allocated.
3872  */
3873
3874 static int parse_buffer(union tvec_regval *rv,
3875                         const struct tvec_regdef *rd,
3876                         struct tvec_state *tv)
3877 {
3878   unsigned long sz, a = 0, m = 0;
3879   int ch, rc;
3880
3881   if (parse_szint(tv, &sz, "@;", "buffer length")) { rc = -1; goto end; }
3882   if (check_unsigned_range(sz, &tvrange_size, tv, "buffer length"))
3883     { rc = -1; goto end; }
3884   if (check_string_length(sz, rd->arg.p, tv)) { rc = -1; goto end; }
3885
3886   if (tvec_nexttoken(tv)) goto done;
3887   ch = getc(tv->fp);
3888   if (ch != '@') { rc = tvec_syntax(tv, ch, "`@'"); goto end; }
3889
3890   if (parse_szint(tv, &m, "+;", "alignment quantum")) { rc = -1; goto end; }
3891   if (check_unsigned_range(a, &tvrange_size, tv, "alignment quantum"))
3892     { rc = -1; goto end; }
3893   if (m == 1) m = 0;
3894
3895   if (tvec_nexttoken(tv)) goto done;
3896   ch = getc(tv->fp);
3897   if (ch != '+') { rc = tvec_syntax(tv, ch, "`+'"); goto end; }
3898
3899   if (parse_szint(tv, &a, ";", "alignment offset")) { rc = -1; goto end; }
3900   if (check_unsigned_range(m, &tvrange_size, tv, "alignment offset"))
3901     { rc = -1; goto end; }
3902   if (a >= m) {
3903     rc = tvec_error(tv, "alignment offset %lu >= quantum %lu",
3904                     (unsigned long)a, (unsigned long)m);
3905     goto end;
3906   }
3907
3908 done:
3909   rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
3910   rc = 0;
3911 end:
3912   return (rc);
3913 }
3914
3915 /* --- @dump_buffer@ --- *
3916  *
3917  * Arguments:   @const union tvec_regval *rv@ = register value
3918  *              @const struct tvec_regdef *rd@ = register definition
3919  *              @unsigned style@ = output style (@TVSF_...@)
3920  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
3921  *
3922  * Returns:     ---
3923  *
3924  * Use:         Dump a register value to the format output.
3925  *
3926  *              Buffer values are dumped as their size, with the alignment
3927  *              quantum and alignment offset if these are non-default.
3928  */
3929
3930 static void dump_buffer(const union tvec_regval *rv,
3931                         const struct tvec_regdef *rd,
3932                         unsigned style,
3933                         const struct gprintf_ops *gops, void *go)
3934 {
3935   if (style&TVSF_RAW) gprintf(gops, go, "buffer:");
3936   format_size(gops, go, rv->buf.sz, style);
3937   if (rv->buf.m) {
3938     gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "@" : " @ ");
3939     format_size(gops, go, rv->buf.m, style);
3940     if (rv->buf.a) {
3941       gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "+" : " + ");
3942       format_size(gops, go, rv->buf.a, style);
3943     }
3944   }
3945   if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
3946     gprintf(gops, go, " ; = %lu", (unsigned long)rv->buf.sz);
3947     if (rv->buf.m) {
3948       gprintf(gops, go, " @ %lu", (unsigned long)rv->buf.m);
3949       if (rv->buf.a) gprintf(gops, go, " + %lu", (unsigned long)rv->buf.a);
3950     }
3951     gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->buf.sz);
3952     if (rv->buf.m) {
3953       gprintf(gops, go, " @ "); format_unsigned_hex(gops, go, rv->buf.m);
3954       if (rv->buf.a) {
3955         gprintf(gops, go, " + ");
3956         format_unsigned_hex(gops, go, rv->buf.a);
3957       }
3958     }
3959   }
3960 }
3961
3962 /* Buffer type definition. */
3963 const struct tvec_regty tvty_buffer = {
3964   init_buffer, release_buffer, eq_buffer, copy_buffer,
3965   tobuf_buffer, frombuf_buffer,
3966   parse_buffer, dump_buffer
3967 };
3968
3969 /* --- @tvec_initbuffer@ --- *
3970  *
3971  * Arguments:   @union tvec_regval *rv@ = register value
3972  *              @const union tvec_regval *ref@ = source buffer
3973  *              @size_t sz@ = size to allocate
3974  *
3975  * Returns:     ---
3976  *
3977  * Use:         Initialize the alignment parameters in @rv@ to match @ref@,
3978  *              and the size to @sz@.
3979  */
3980
3981 void tvec_initbuffer(union tvec_regval *rv,
3982                      const union tvec_regval *ref, size_t sz)
3983   { rv->buf.sz = sz; rv->buf.a = ref->buf.a; rv->buf.m = ref->buf.m; }
3984
3985 /* --- @tvec_allocbuffer@ --- *
3986  *
3987  * Arguments:   @union tvec_regval *rv@ = register value
3988  *
3989  * Returns:     ---
3990  *
3991  * Use:         Allocate @sz@ bytes to the buffer and fill the space with a
3992  *              distinctive pattern.
3993  */
3994
3995 void tvec_allocbuffer(union tvec_regval *rv)
3996 {
3997   unsigned char *p;
3998   size_t m = rv->buf.m, a = rv->buf.a, off;
3999
4000   if (rv->buf.p) free(rv->buf.p - rv->buf.off);
4001
4002   if (m < 2) {
4003     p = x_alloc(&arena_stdlib, rv->buf.sz); off = 0;
4004   } else {
4005     p = x_alloc(&arena_stdlib, rv->buf.sz + m - 1);
4006     if (!(m&(m - 1))) off = (a - (size_t)p)&(m - 1);
4007     else off = (a + m - (size_t)p%m)%m;
4008   }
4009   rv->buf.p = p + off; rv->buf.off = off;
4010   memset(rv->buf.p, '?', rv->buf.sz);
4011 }
4012
4013 /*----- That's all, folks -------------------------------------------------*/