chiark - git - mdw - mLib/blob - test/tvec-types.c

   1 /* -*-c-*-
   2  *
   3  * Types for the test-vector framework
   4  *
   5  * (c) 2023 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of the mLib utilities library.
  11  *
  12  * mLib is free software: you can redistribute it and/or modify it under
  13  * the terms of the GNU Library General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or (at
  15  * your option) any later version.
  16  *
  17  * mLib is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20  * License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with mLib.  If not, write to the Free Software
  24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25  * USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <assert.h>
  31 #include <ctype.h>
  32 #include <errno.h>
  33 #include <float.h>
  34 #include <limits.h>
  35 #include <math.h>
  36 #include <stdio.h>
  37 #include <string.h>
  38
  39 #include "buf.h"
  40 #include "codec.h"
  41 #  include "base32.h"
  42 #  include "base64.h"
  43 #  include "hex.h"
  44 #include "dstr.h"
  45 #include "maths.h"
  46 #include "tvec.h"
  47
  48 /*----- Preliminary utilities ---------------------------------------------*/
  49
  50 /* --- @trivial_release@ --- *
  51  *
  52  * Arguments:   @union tvec_regval *rv@ = a register value
  53  *              @const struct tvec_regdef@ = the register definition
  54  *
  55  * Returns:     ---
  56  *
  57  * Use:         Does nothing.  Used for register values which don't retain
  58  *              resources.
  59  */
  60
  61 static void trivial_release(union tvec_regval *rv,
  62                             const struct tvec_regdef *rd)
  63   { ; }
  64
  65 /*----- Integer utilities -------------------------------------------------*/
  66
  67 /* --- @unsigned_to_buf@, @signed_to_buf@ --- *
  68  *
  69  * Arguments:   @buf *b@ = buffer to write on
  70  *              @unsigned long u@ or @long i@ = integer to write
  71  *
  72  * Returns:     Zero on success, @-1@ on failure.
  73  *
  74  * Use:         Write @i@ to the buffer, in big-endian (two's-complement, it
  75  *              signed) format.
  76  */
  77
  78 static int unsigned_to_buf(buf *b, unsigned long u)
  79   { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }
  80
  81 static int signed_to_buf(buf *b, long i)
  82 {
  83   kludge64 k;
  84   unsigned long u;
  85
  86   u = i;
  87   if (i >= 0) ASSIGN64(k, u);
  88   else { ASSIGN64(k, ~u); CPL64(k, k); }
  89   return (buf_putk64l(b, k));
  90 }
  91
  92 /* --- @unsigned_from_buf@, @signed_from_buf@ --- *
  93  *
  94  * Arguments:   @buf *b@ = buffer to write on
  95  *              @unsigned long *u_out@ or @long *i_out@ = where to put the
  96  *                      result
  97  *
  98  * Returns:     Zero on success, @-1@ on failure.
  99  *
 100  * Use:         Read an integer, in big-endian (two's-complement, if signed)
 101  *              format, from the buffer.
 102  */
 103
 104 static int unsigned_from_buf(buf *b, unsigned long *u_out)
 105 {
 106   kludge64 k, ulmax;
 107
 108   ASSIGN64(ulmax, ULONG_MAX);
 109   if (buf_getk64l(b, &k)) return (-1);
 110   if (CMP64(k, >, ulmax)) return (-1);
 111   *u_out = GET64(unsigned long, k); return (0);
 112 }
 113
 114 /* --- @hex_width@ --- *
 115  *
 116  * Arguments:   @unsigned long u@ = an integer
 117  *
 118  * Returns:     A suitable number of digits to use in order to display @u@ in
 119  *              hex.  Currently, we select a power of two sufficient to show
 120  *              the value, but at least 2.
 121  */
 122
 123 static int hex_width(unsigned long u)
 124 {
 125   int wd;
 126   unsigned long t;
 127
 128   for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
 129   return (wd/4);
 130 }
 131
 132 /* --- @format_unsigned_hex@, @format_signed_hex@ --- *
 133  *
 134  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 135  *              @void *go@ = print destination
 136  *              @unsigned long u@ or @long i@ = integer to print
 137  *
 138  * Returns:     ---
 139  *
 140  * Use:         Print an unsigned or signed integer in hexadecimal.
 141  */
 142
 143 static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
 144                                 unsigned long u)
 145   { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }
 146
 147 static void format_signed_hex(const struct gprintf_ops *gops, void *go,
 148                               long i)
 149 {
 150   unsigned long u = i >= 0 ? i : -(unsigned long)i;
 151   gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
 152 }
 153
 154 static int signed_from_buf(buf *b, long *i_out)
 155 {
 156   kludge64 k, lmax, not_lmin;
 157
 158   ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
 159   if (buf_getk64l(b, &k)) return (-1);
 160   if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
 161   else {
 162     CPL64(k, k);
 163     if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
 164     else return (-1);
 165   }
 166   return (0);
 167 }
 168
 169 /* --- @check_unsigned_range@, @check_signed_range@ --- *
 170  *
 171  * Arguments:   @unsigned long u@ or @long i@ = an integer
 172  *              @const struct tvec_urange *ur@ or
 173  *                      @const struct tvec_irange *ir@ = range specification,
 174  *                      or null
 175  *              @struct tvec_state *tv@ = test vector state
 176  *
 177  * Returns:     Zero on success, or @-1@ on error.
 178  *
 179  * Use:         Check that the integer is within bounds.  If not, report a
 180  *              suitable error and return a failure indication.
 181  */
 182
 183 static int check_signed_range(long i,
 184                               const struct tvec_irange *ir,
 185                               struct tvec_state *tv)
 186 {
 187   if (ir && (ir->min > i || i > ir->max)) {
 188     tvec_error(tv, "integer %ld out of range (must be in [%ld .. %ld])",
 189                i, ir->min, ir->max);
 190     return (-1);
 191   }
 192   return (0);
 193 }
 194
 195 static int check_unsigned_range(unsigned long u,
 196                                 const struct tvec_urange *ur,
 197                                 struct tvec_state *tv)
 198 {
 199   if (ur && (ur->min > u || u > ur->max)) {
 200     tvec_error(tv, "integer %lu out of range (must be in [%lu .. %lu])",
 201                u, ur->min, ur->max);
 202     return (-1);
 203   }
 204   return (0);
 205 }
 206
 207 /* --- @chtodig@ --- *
 208  *
 209  * Arguments:   @int ch@ = a character
 210  *
 211  * Returns:     The numeric value of the character as a digit, or @-1@ if
 212  *              it's not a digit.  Letters count as extended digits starting
 213  *              with value 10; case is not significant.
 214  */
 215
 216 static int chtodig(int ch)
 217 {
 218   if ('0' <= ch && ch <= '9') return (ch - '0');
 219   else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
 220   else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
 221   else return (-1);
 222 }
 223
 224 /* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
 225  *
 226  * Arguments:   @unsigned long *u_out@, @long *i_out@ = where to put the
 227  *                      result
 228  *              @const char **q_out@ = where to put the end position
 229  *              @const char *p@ = pointer to the string to parse
 230  *
 231  * Returns:     Zero on success, @-1@ on error.
 232  *
 233  * Use:         Parse an integer from a string in the test-vector format.
 234  *              This is mostly extension of the traditional C @strtoul@
 235  *              format: supported inputs include:
 236  *
 237  *                * NNN -- a decimal number (even if it starts with `0');
 238  *                * 0xNNN -- hexadecimal;
 239  *                * 0oNNN -- octal;
 240  *                * 0bNNN -- binary;
 241  *                * NNrNNN -- base NN.
 242  *
 243  *              Furthermore, single underscores are permitted internally as
 244  *              an insignificant digit separator.
 245  */
 246
 247 static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
 248                                   const char *p)
 249 {
 250   unsigned long u;
 251   int ch, d, r;
 252   const char *q;
 253   unsigned f = 0;
 254 #define f_implicit 1u                   /* implicitly reading base 10 */
 255 #define f_digit 2u                      /* read a real digit */
 256 #define f_uscore 4u                     /* found an underscore */
 257
 258   /* Initial setup
 259    *
 260    * This will deal with the traditional `0[box]...' prefixes.  We'll leave
 261    * our new `NNr...' syntax for later.
 262    */
 263   if (p[0] != '0' || !p[1]) {
 264     d = chtodig(*p); if (0 > d || d >= 10) return (-1);
 265     r = 10; u = d; p++; f |= f_implicit | f_digit;
 266   } else {
 267     u = 0; d = chtodig(p[2]);
 268     if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
 269     else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
 270     else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
 271     else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
 272     else { r = 10; f |= f_digit; p++; }
 273   }
 274
 275   q = p;
 276   for (;;) {
 277     /* Work through the string a character at a time. */
 278
 279     ch = *p; switch (ch) {
 280
 281       case '_':
 282         /* An underscore is OK if we haven't just seen one. */
 283
 284         if (f&f_uscore) goto done;
 285         p++; f = (f&~f_implicit) | f_uscore;
 286         break;
 287
 288       case 'r': case 'R':
 289         /* An `r' is OK if the number so far is small enough to be a sensible
 290          * base, and we're scanning decimal implicitly.
 291          */
 292
 293         if (!(f&f_implicit) || !u || u >= 36) goto done;
 294         d = chtodig(p[1]); if (0 > d || d >= u) goto done;
 295         r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
 296         break;
 297
 298       default:
 299         /* Otherwise we expect a valid digit and accumulate it. */
 300         d = chtodig(ch); if (d < 0 || d >= r) goto done;
 301         if (u > ULONG_MAX/r) return (-1);
 302         u *= r; if (u > ULONG_MAX - d) return (-1);
 303         u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
 304         break;
 305     }
 306   }
 307
 308 done:
 309   if (!(f&f_digit)) return (-1);
 310   *u_out = u; *q_out = q; return (0);
 311
 312 #undef f_implicit
 313 #undef f_digit
 314 #undef f_uscore
 315 }
 316
 317 static int parse_signed_integer(long *i_out, const char **q_out,
 318                                 const char *p)
 319 {
 320   unsigned long u;
 321   unsigned f = 0;
 322 #define f_neg 1u
 323
 324   /* Read an initial sign. */
 325   if (*p == '+') p++;
 326   else if (*p == '-') { f |= f_neg; p++; }
 327
 328   /* Scan an unsigned number. */
 329   if (parse_unsigned_integer(&u, q_out, p)) return (-1);
 330
 331   /* Check for signed overflow and apply the sign. */
 332   if (!(f&f_neg)) {
 333     if (u > LONG_MAX) return (-1);
 334     *i_out = u;
 335   } else {
 336     if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
 337     *i_out = u ? -(long)(u - 1) - 1 : 0;
 338   }
 339
 340   return (0);
 341
 342 #undef f_neg
 343 }
 344
 345 /* --- @parse_unsigned@, @parse_signed@ --- *
 346  *
 347  * Arguments:   @unsigned long *u_out@ or @long *i_out@ = where to put the
 348  *                      result
 349  *              @const char *p@ = string to parse
 350  *              @const struct tvec_urange *ur@ or
 351  *                      @const struct tvec_irange *ir@ = range specification,
 352  *                      or null
 353  *              @struct tvec_state *tv@ = test vector state
 354  *
 355  * Returns:     Zero on success, @-1@ on error.
 356  *
 357  * Use:         Parse and range-check an integer.  Unlike @parse_(un)signed_
 358  *              integer@, these functions check that there's no cruft
 359  *              following the final digit, and report errors as they find
 360  *              them rather than leaving that to the caller.
 361  */
 362
 363 static int parse_unsigned(unsigned long *u_out, const char *p,
 364                           const struct tvec_urange *ur,
 365                           struct tvec_state *tv)
 366 {
 367   unsigned long u;
 368   const char *q;
 369
 370   if (parse_unsigned_integer(&u, &q, p))
 371     return (tvec_error(tv, "invalid unsigned integer `%s'", p));
 372   if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
 373   if (check_unsigned_range(u, ur, tv)) return (-1);
 374   *u_out = u; return (0);
 375 }
 376
 377 static int parse_signed(long *i_out, const char *p,
 378                         const struct tvec_irange *ir,
 379                         struct tvec_state *tv)
 380 {
 381   long i;
 382   const char *q;
 383
 384   if (parse_signed_integer(&i, &q, p))
 385     return (tvec_error(tv, "invalid signed integer `%s'", p));
 386   if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
 387   if (check_signed_range(i, ir, tv)) return (-1);
 388   *i_out = i; return (0);
 389 }
 390
 391 /*----- Floating-point utilities ------------------------------------------*/
 392
 393 /* --- @eqish_floating_p@ --- *
 394  *
 395  * Arguments:   @double x, y@ = two numbers to compare
 396  *              @const struct tvec_floatinfo *fi@ = floating-point info
 397  *
 398  * Returns:     Nonzero if  the comparand @y@ is sufficiently close to the
 399  *              reference @x@, or zero if it's definitely different.
 400  */
 401
 402 static int eqish_floating_p(double x, double y,
 403                             const struct tvec_floatinfo *fi)
 404 {
 405   double t;
 406
 407   if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
 408   if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
 409
 410   switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
 411     case TVFF_EXACT:
 412       return (x == y && NEGP(x) == NEGP(y));
 413     case TVFF_ABSDELTA:
 414       t = x - y; if (t < 0) t = -t; return (t < fi->delta);
 415     case TVFF_RELDELTA:
 416       t = 1.0 - y/x; if (t < 0) t = -t; return (t < fi->delta);
 417     default:
 418       abort();
 419   }
 420 }
 421
 422 /* --- @format_floating@ --- *
 423  *
 424  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 425  *              @void *go@ = print destination
 426  *              @double x@ = number to print
 427  *
 428  * Returns:     ---
 429  *
 430  * Use:         Print a floating-point number, accurately.
 431  */
 432
 433 static void format_floating(const struct gprintf_ops *gops, void *go,
 434                             double x)
 435 {
 436   int prec;
 437
 438   if (NANP(x))
 439     gprintf(gops, go, "#nan");
 440   else if (INFP(x))
 441     gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
 442   else {
 443     /* Ugh.  C doesn't provide any function for just printing a
 444      * floating-point number /correctly/, i.e., so that you can read the
 445      * result back and recover the number you first thought of.  There are
 446      * complicated algorithms published for doing this, but I really don't
 447      * want to get into that here.  So we have this.
 448      *
 449      * The sign doesn't cause significant difficulty so we're going to ignore
 450      * it for now.  So suppose we're given a number %$x = f b^e$%, in
 451      * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
 452      * %$0 \le f < 1$%.  We're going to convert it into the nearest integer
 453      * of the form %$X = F B^E$%, with similar conditions, only with the
 454      * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
 455      * or %$F \ge B^{-N}$%.
 456      *
 457      * We're rounding to the nearest such %$X$%.  If there is to be ambiguity
 458      * in the conversion, then some %$x = f b^e$% and the next smallest
 459      * representable number %$x' = x + b^{e-n}$% must both map to the same
 460      * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
 461      * any other number representable in the target system.  The nest larger
 462      * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
 463      * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
 464      * is actually %$X - B^{E-N-1}$%.  We ignore this latter possibility in
 465      * the pursuit of a conservative estimate (though actually it doesn't
 466      * matter).
 467      *
 468      * If both %$x$% and %$x'$% map to %$X$% then we must have
 469      * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
 470      * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
 471      * and secondly %$b^{e-n} \le B^{E-N}$%.  Since these inequalities are in
 472      * opposite senses, we can divide, giving
 473      *
 474      *         %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
 475      *
 476      * whence
 477      *
 478      *         %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
 479      *
 480      * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
 481      * possible, it must be the case that
 482      *
 483      *         %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
 484      *
 485      * Then rearrange and take logarithms, obtaining
 486      *
 487      *         %$(N - 1) \log B < n \log b$% ,
 488      *
 489      * and so
 490      *
 491      *         %$N < n \log b/\log B + 1$% .
 492      *
 493      * Recall that this is a necessary condition for a collision to occur; we
 494      * are therefore safe whenever
 495      *
 496      *         %$N \ge n \log b/\log B + 1$% ;
 497      *
 498      * so, taking ceilings,
 499      *
 500      *         %$N \ge \lceil n \log b/\log B \rceil + 1$% .
 501      *
 502      * So that's why we have this.
 503      *
 504      * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that
 505      * we can calculate this without ending up on the wrong side of an
 506      * integer boundary.
 507      *
 508      * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only
 509      * as a constant.  Except that modern compilers are more than clever
 510      * enough to work out that this is a constant anyway.
 511      *
 512      * This is sometimes an overestimate: we'll print out meaningless digits
 513      * that don't represent anything we actually know about the number in
 514      * question.  To fix that, we'd need a complicated algorithm like Steele
 515      * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
 516      * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
 517      * something else in difficult situations).
 518      */
 519
 520     prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
 521     gprintf(gops, go, "%.*g", prec, x);
 522   }
 523 }
 524
 525 /* --- @parse_floating@ --- *
 526  *
 527  * Arguments:   @double *x_out@ = where to put the result
 528  *              @const char *q_out@ = where to leave end pointer, or null
 529  *              @const char *p@ = string to parse
 530  *              @const struct tvec_floatinfo *fi@ = floating-point info
 531  *              @struct tvec_state *tv@ = test vector state
 532  *
 533  * Returns:     Zero on success, @-1@ on error.
 534  *
 535  * Use:         Parse a floating-point number from a string.  Reports any
 536  *              necessary errors.  If @q_out@ is not null then trailing
 537  *              material is permitted and a pointer to it is left in
 538  *              @*q_out@; this will be null if there is no trailing material.
 539  */
 540
 541 static int parse_floating(double *x_out, const char **q_out, const char *p,
 542                           const struct tvec_floatinfo *fi,
 543                           struct tvec_state *tv)
 544 {
 545   const char *pp; char *q;
 546   dstr d = DSTR_INIT;
 547   double x;
 548   int olderr, rc;
 549
 550   if (q_out) *q_out = 0;
 551
 552   /* Check for special tokens. */
 553   if (STRCMP(p, ==, "#nan")) {
 554 #ifdef NAN
 555     x = NAN; rc = 0;
 556 #else
 557     tvec_error(tv, "NaN not supported on this system");
 558     rc = -1; goto end;
 559 #endif
 560   }
 561
 562   else if (STRCMP(p, ==, "#inf") ||
 563            STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
 564 #ifdef INFINITY
 565     x = INFINITY; rc = 0;
 566 #else
 567     tvec_error(tv, "infinity not supported on this system");
 568     rc = -1; goto end;
 569 #endif
 570   }
 571
 572   else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
 573 #ifdef INFINITY
 574     x = -INFINITY; rc = 0;
 575 #else
 576     tvec_error(tv, "infinity not supported on this system");
 577     rc = -1; goto end;
 578 #endif
 579   }
 580
 581   /* Check that this looks like a number, so we can exclude `strtod'
 582    * recognizing its own non-finite number tokens.
 583    */
 584   else {
 585     pp = p;
 586     if (*pp == '+' || *pp == '-') pp++;
 587     if (*pp == '.') pp++;
 588     if (!ISDIGIT(*pp)) {
 589       tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
 590       rc = -1; goto end;
 591     }
 592
 593     /* Parse the number using the system parser. */
 594     olderr = errno; errno = 0;
 595     x = strtod(p, &q);
 596     if (!*q) /* nothing to do */;
 597     else if (q_out) *q_out = q;
 598     else { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; }
 599     if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
 600       tvec_error(tv, "invalid floating-point number `%.*s': %s",
 601                  (int)(q - p), p, strerror(errno));
 602       rc = -1; goto end;
 603     }
 604     errno = olderr;
 605   }
 606
 607   /* Check that the number is acceptable. */
 608   if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
 609     tvec_error(tv, "#nan not allowed here");
 610     rc = -1; goto end;
 611   }
 612
 613   if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
 614              (!(fi->f&TVFF_NOMAX) && x > fi->max))) {
 615     dstr_puts(&d, "floating-point number ");
 616     format_floating(&dstr_printops, &d, x);
 617     dstr_puts(&d, " out of range (must be in ");
 618     if (fi->f&TVFF_NOMIN)
 619       dstr_puts(&d, "(#-inf");
 620     else
 621       { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
 622     dstr_puts(&d, " .. ");
 623     if (fi->f&TVFF_NOMAX)
 624       dstr_puts(&d, "#+inf)");
 625     else
 626       { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
 627     dstr_putc(&d, ')'); dstr_putz(&d);
 628     tvec_error(tv, "%s", d.buf); rc = -1; goto end;
 629   }
 630
 631   /* All done. */
 632   *x_out = x; rc = 0;
 633 end:
 634   dstr_destroy(&d);
 635   return (rc);
 636 }
 637
 638 /*----- String utilities --------------------------------------------------*/
 639
 640 /* Special character name table. */
 641 static const struct chartab {
 642   const char *name;                     /* character name */
 643   int ch;                               /* character value */
 644   unsigned f;                           /* flags: */
 645 #define CTF_PREFER 1u                   /*   preferred name */
 646 #define CTF_SHORT 2u                    /*   short name (compact style) */
 647 } chartab[] = {
 648   { "#eof",             EOF,    CTF_PREFER | CTF_SHORT },
 649   { "#nul",             '\0',   CTF_PREFER },
 650   { "#bell",            '\a',   CTF_PREFER },
 651   { "#ding",            '\a',   0 },
 652   { "#bel",             '\a',   CTF_SHORT },
 653   { "#backspace",       '\b',   CTF_PREFER },
 654   { "#bs",              '\b',   CTF_SHORT },
 655   { "#escape",          '\x1b', CTF_PREFER },
 656   { "#esc",             '\x1b', CTF_SHORT },
 657   { "#formfeed",        '\f',   CTF_PREFER },
 658   { "#ff",              '\f',   CTF_SHORT },
 659   { "#newline",         '\n',   CTF_PREFER },
 660   { "#linefeed",        '\n',   0 },
 661   { "#lf",              '\n',   CTF_SHORT },
 662   { "#nl",              '\n',   0 },
 663   { "#return",          '\r',   CTF_PREFER },
 664   { "#carriage-return", '\r',   0 },
 665   { "#cr",              '\r',   CTF_SHORT },
 666   { "#tab",             '\t',   CTF_PREFER | CTF_SHORT },
 667   { "#horizontal-tab",  '\t',   0 },
 668   { "#ht",              '\t',   0 },
 669   { "#vertical-tab",    '\v',   CTF_PREFER },
 670   { "#vt",              '\v',   CTF_SHORT },
 671   { "#space",           ' ',    0 },
 672   { "#spc",             ' ',    CTF_SHORT },
 673   { "#delete",          '\x7f', CTF_PREFER },
 674   { "#del",             '\x7f', CTF_SHORT },
 675   { 0,                  0,      0 }
 676 };
 677
 678 /* --- @find_charname@ --- *
 679  *
 680  * Arguments:   @int ch@ = character to match
 681  *              @unsigned f@ = flags (@CTF_...@) to match
 682  *
 683  * Returns:     The name of the character, or null if no match is found.
 684  *
 685  * Use:         Looks up a name for a character.  Specifically, it returns
 686  *              the first entry in the @chartab@ table which matches @ch@ and
 687  *              which has one of the flags @f@ set.
 688  */
 689
 690 static const char *find_charname(int ch, unsigned f)
 691 {
 692   const struct chartab *ct;
 693
 694   for (ct = chartab; ct->name; ct++)
 695     if (ct->ch == ch && (ct->f&f)) return (ct->name);
 696   return (0);
 697 }
 698
 699 /* --- @read_charname@ --- *
 700  *
 701  * Arguments:   @int *ch_out@ = where to put the character
 702  *              @const char *p@ = character name
 703  *              @unsigned f@ = flags (@TCF_...@)
 704  *
 705  * Returns:     Zero if a match was found, @-1@ if not.
 706  *
 707  * Use:         Looks up a character by name.  If @RCF_EOFOK@ is set in @f@,
 708  *              then the @EOF@ marker can be matched; otherwise it can't.
 709  */
 710
 711 #define RCF_EOFOK 1u
 712 static int read_charname(int *ch_out, const char *p, unsigned f)
 713 {
 714   const struct chartab *ct;
 715
 716   for (ct = chartab; ct->name; ct++)
 717     if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
 718       { *ch_out = ct->ch; return (0); }
 719   return (-1);
 720 }
 721
 722 /* --- @format_charesc@ --- *
 723  *
 724  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 725  *              @void *go@ = print destination
 726  *              @int ch@ = character to format
 727  *              @unsigned f@ = flags (@FCF_...@)
 728  *
 729  * Returns:     ---
 730  *
 731  * Use:         Format a character as an escape sequence, possibly as part of
 732  *              a larger string.  If @FCF_BRACE@ is set in @f@, then put
 733  *              braces around a `\x...'  code, so that it's suitable for use
 734  *              in a longer string.
 735  */
 736
 737 #define FCF_BRACE 1u
 738 static void format_charesc(const struct gprintf_ops *gops, void *go,
 739                            int ch, unsigned f)
 740 {
 741   switch (ch) {
 742     case '\a': gprintf(gops, go, "\\a"); break;
 743     case '\b': gprintf(gops, go, "\\b"); break;
 744     case '\x1b': gprintf(gops, go, "\\e"); break;
 745     case '\f': gprintf(gops, go, "\\f"); break;
 746     case '\r': gprintf(gops, go, "\\r"); break;
 747     case '\n': gprintf(gops, go, "\\n"); break;
 748     case '\t': gprintf(gops, go, "\\t"); break;
 749     case '\v': gprintf(gops, go, "\\v"); break;
 750     case '\\': gprintf(gops, go, "\\\\"); break;
 751     case '\'': gprintf(gops, go, "\\'"); break;
 752     case '\0':
 753       if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
 754       else gprintf(gops, go, "\\0");
 755       break;
 756     default:
 757       if (f&FCF_BRACE)
 758         gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
 759       else
 760         gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
 761       break;
 762   }
 763 }
 764
 765 /* --- @format_char@ --- *
 766  *
 767  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 768  *              @void *go@ = print destination
 769  *              @int ch@ = character to format
 770  *
 771  * Returns:     ---
 772  *
 773  * Use:         Format a single character.
 774  */
 775
 776 static void format_char(const struct gprintf_ops *gops, void *go, int ch)
 777 {
 778   switch (ch) {
 779     case '\\': case '\'': escape:
 780       gprintf(gops, go, "'");
 781       format_charesc(gops, go, ch, 0);
 782       gprintf(gops, go, "'");
 783       break;
 784     default:
 785       if (!isprint(ch)) goto escape;
 786       gprintf(gops, go, "'%c'", ch);
 787       break;
 788   }
 789 }
 790
 791 /* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
 792  *
 793  * Arguments:   @const struct gprintf_ops *gops@ = print operations
 794  *              @void *go@ = print destination
 795  *              @unsigned long u@ or @long i@ = an integer
 796  *
 797  * Returns:     ---
 798  *
 799  * Use:         Format a (signed or unsigned) integer as a character, if it's
 800  *              in range, printing something like `= 'q''.  It's assumed that
 801  *              a comment marker has already been output.
 802  */
 803
 804 static void maybe_format_unsigned_char
 805   (const struct gprintf_ops *gops, void *go, unsigned long u)
 806 {
 807   const char *p;
 808
 809   p = find_charname(u, CTF_PREFER);
 810   if (p) gprintf(gops, go, " = %s", p);
 811   if (u < UCHAR_MAX)
 812     { gprintf(gops, go, " = "); format_char(gops, go, u); }
 813 }
 814
 815 static void maybe_format_signed_char
 816   (const struct gprintf_ops *gops, void *go, long i)
 817 {
 818   const char *p;
 819
 820   p = find_charname(i, CTF_PREFER);
 821   if (p) gprintf(gops, go, " = %s", p);
 822   if (0 <= i && i < UCHAR_MAX)
 823     { gprintf(gops, go, " = "); format_char(gops, go, i); }
 824 }
 825
 826 /* --- @read_charesc@ --- *
 827  *
 828  * Arguments:   @int *ch_out@ = where to put the result
 829  *              @struct tvec_state *tv@ = test vector state
 830  *
 831  * Returns:     Zero on success, @-1@ on error.
 832  *
 833  * Use:         Parse and convert an escape sequence from @tv@'s input
 834  *              stream, assuming that the initial `\' has already been read.
 835  *              Reports errors as appropriate.
 836  */
 837
 838 static int read_charesc(int *ch_out, struct tvec_state *tv)
 839 {
 840   int ch, i, esc;
 841   unsigned f = 0;
 842 #define f_brace 1u
 843
 844   ch = getc(tv->fp);
 845   switch (ch) {
 846
 847     /* Things we shouldn't find. */
 848     case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
 849
 850     /* Single-character escapes. */
 851     case '\'': *ch_out = '\''; break;
 852     case '\\': *ch_out = '\\'; break;
 853     case '"': *ch_out = '"'; break;
 854     case 'a': *ch_out = '\a'; break;
 855     case 'b': *ch_out = '\b'; break;
 856     case 'e': *ch_out = '\x1b'; break;
 857     case 'f': *ch_out = '\f'; break;
 858     case 'n': *ch_out = '\n'; break;
 859     case 'r': *ch_out = '\r'; break;
 860     case 't': *ch_out = '\t'; break;
 861     case 'v': *ch_out = '\v'; break;
 862
 863     /* Hex escapes, with and without braces. */
 864     case 'x':
 865       ch = getc(tv->fp);
 866       if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
 867       else f &= ~f_brace;
 868       esc = chtodig(ch);
 869       if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
 870       for (;;) {
 871         ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
 872         esc = 16*esc + i;
 873         if (esc > UCHAR_MAX)
 874           return (tvec_error(tv,
 875                              "character code %d out of range", esc));
 876       }
 877       if (!(f&f_brace)) ungetc(ch, tv->fp);
 878       else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
 879       *ch_out = esc;
 880       break;
 881
 882     /* Other things, primarily octal escapes. */
 883     case '{':
 884       f |= f_brace; ch = getc(tv->fp);
 885       /* fall through */
 886     default:
 887       if ('0' <= ch && ch < '8') {
 888         i = 1; esc = ch - '0';
 889         for (;;) {
 890           ch = getc(tv->fp);
 891           if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
 892           esc = 8*esc + ch - '0';
 893           i++; if (i >= 3) break;
 894         }
 895         if (f&f_brace) {
 896           ch = getc(tv->fp);
 897           if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
 898         }
 899         if (esc > UCHAR_MAX)
 900           return (tvec_error(tv,
 901                              "character code %d out of range", esc));
 902         *ch_out = esc; break;
 903       } else
 904         return (tvec_syntax(tv, ch, "string escape"));
 905   }
 906
 907   /* Done. */
 908   return (0);
 909
 910 #undef f_brace
 911 }
 912
 913 /* --- @read_quoted_string@ --- *
 914  *
 915  * Arguments:   @dstr *d@ = string to write to
 916  *              @int quote@ = initial quote, `'' or `"'
 917  *              @struct tvec_state *tv@ = test vector state
 918  *
 919  * Returns:     Zero on success, @-1@ on error.
 920  *
 921  * Use:         Read the rest of a quoted string into @d@, reporting errors
 922  *              as appropriate.
 923  *
 924  *              A single-quoted string is entirely literal.  A double-quoted
 925  *              string may contain C-like escapes.
 926  */
 927
 928 static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
 929 {
 930   int ch;
 931
 932   for (;;) {
 933     ch = getc(tv->fp);
 934     switch (ch) {
 935       case EOF: case '\n':
 936         return (tvec_syntax(tv, ch, "`%c'", quote));
 937       case '\\':
 938         if (quote == '\'') goto ordinary;
 939         ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
 940         ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
 941         goto ordinary;
 942       default:
 943         if (ch == quote) goto end;
 944       ordinary:
 945         DPUTC(d, ch);
 946         break;
 947     }
 948   }
 949
 950 end:
 951   DPUTZ(d);
 952   return (0);
 953 }
 954
 955 /* --- @collect_bare@ --- *
 956  *
 957  * Arguments:   @dstr *d@ = string to write to
 958  *              @struct tvec_state *tv@ = test vector state
 959  *
 960  * Returns:     Zero on success, @-1@ on error.
 961  *
 962  * Use:         Read barewords and the whitespace between them.  Stop when we
 963  *              encounter something which can't start a bareword.
 964  */
 965
 966 static int collect_bare(dstr *d, struct tvec_state *tv)
 967 {
 968   size_t pos = d->len;
 969   enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
 970   int ch, rc;
 971
 972   for (;;) {
 973     ch = getc(tv->fp);
 974     switch (ch) {
 975       case EOF:
 976         tvec_syntax(tv, ch, "bareword");
 977         rc = -1; goto end;
 978       case '\n':
 979         if (s == ESCAPE) { tv->lno++; goto addch; }
 980         if (s == WORD) pos = d->len;
 981         ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
 982         DPUTC(d, ' '); s = SPACE;
 983         break;
 984       case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
 985         if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
 986         goto addch;
 987       case '\\':
 988         s = ESCAPE;
 989         break;
 990       default:
 991         if (s != ESCAPE && isspace(ch)) {
 992           if (s == WORD) pos = d->len;
 993           DPUTC(d, ch); s = SPACE;
 994           break;
 995         }
 996       addch:
 997         DPUTC(d, ch); s = WORD;
 998     }
 999   }
1000
1001 done:
1002   if (s == SPACE) d->len = pos;
1003   DPUTZ(d); rc = 0;
1004 end:
1005   return (rc);
1006 }
1007
1008 /* --- @set_up_encoding@ --- *
1009  *
1010  * Arguments:   @const codec_class **ccl_out@ = where to put the class
1011  *              @unsigned *f_out@ = where to put the flags
1012  *              @unsigned code@ = the coding scheme to use (@TVEC_...@)
1013  *
1014  * Returns:     ---
1015  *
1016  * Use:         Helper for @read_compound_string@ below.
1017  *
1018  *              Return the appropriate codec class and flags for @code@.
1019  *              Leaves @*ccl_out@ null if the coding scheme doesn't have a
1020  *              backing codec class (e.g., @TVCODE_BARE@).
1021  */
1022
1023 enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
1024 static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
1025                             unsigned code)
1026 {
1027   switch (code) {
1028     case TVCODE_BARE:
1029       *ccl_out = 0; *f_out = 0;
1030       break;
1031     case TVCODE_HEX:
1032       *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
1033       break;
1034     case TVCODE_BASE32:
1035       *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
1036       break;
1037     case TVCODE_BASE64:
1038       *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
1039       break;
1040     default:
1041       abort();
1042   }
1043 }
1044
1045 /* --- @flush_codec@ --- *
1046  *
1047  * Arguments:   @codec *cdc@ = a codec, or null
1048  *              @dstr *d@ = output string
1049  *              @struct tvec_state *tv@ = test vector state
1050  *
1051  * Returns:     Zero on success, @-1@ on error.
1052  *
1053  * Use:         Helper for @read_compound_string@ below.
1054  *
1055  *              Flush out any final buffered material from @cdc@, and check
1056  *              that it's in a good state.  Frees the codec on success.  Does
1057  *              nothing if @cdc@ is null.
1058  */
1059
1060 static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
1061 {
1062   int err;
1063
1064   if (cdc) {
1065     err = cdc->ops->code(cdc, 0, 0, d);
1066     if (err)
1067       return (tvec_error(tv, "invalid %s sequence end: %s",
1068                          cdc->ops->c->name, codec_strerror(err)));
1069     cdc->ops->destroy(cdc);
1070   }
1071   return (0);
1072 }
1073
1074 /* --- @read_compound_string@ --- *
1075  *
1076  * Arguments:   @void **p_inout@ = address of output buffer pointer
1077  *              @size_t *sz_inout@ = address of buffer size
1078  *              @unsigned code@ = initial interpretation of barewords
1079  *              @unsigned f@ = other flags (@RCSF_...@)
1080  *              @struct tvec_state *tv@ = test vector state
1081  *
1082  * Returns:     Zero on success, @-1@ on error.
1083  *
1084  * Use:         Parse a compound string, i.e., a sequence of stringish pieces
1085  *              which might be quoted strings, character names, or barewords
1086  *              to be decoded accoding to @code@, interspersed with
1087  *              additional directives.
1088  *
1089  *              If the initial buffer pointer is non-null and sufficiently
1090  *              large, then it will be reused; otherwise, it is freed and a
1091  *              fresh, sufficiently large buffer is allocated and returned.
1092  */
1093
1094 #define RCSF_NESTED 1u
1095 static int read_compound_string(void **p_inout, size_t *sz_inout,
1096                                 unsigned code, unsigned f,
1097                                 struct tvec_state *tv)
1098 {
1099   const codec_class *ccl; unsigned cdf;
1100   codec *cdc;
1101   dstr d = DSTR_INIT, w = DSTR_INIT;
1102   char *p;
1103   const char *q;
1104   void *pp = 0; size_t sz;
1105   unsigned long n;
1106   int ch, err, rc;
1107
1108   set_up_encoding(&ccl, &cdf, code); cdc = 0;
1109
1110   if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
1111   do {
1112     ch = getc(tv->fp);
1113     switch (ch) {
1114
1115       case ')': case ']': case '}':
1116         /* Close brackets.  Leave these for recursive caller if there is one,
1117          * or just complain.
1118          */
1119
1120         if (!(f&RCSF_NESTED))
1121           { rc = tvec_syntax(tv, ch, "string"); goto end; }
1122         ungetc(ch, tv->fp); goto done;
1123
1124       case '"': case '\'':
1125         /* Quotes.  Read a quoted string. */
1126
1127         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1128         cdc = 0;
1129         if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
1130         break;
1131
1132       case '#':
1133         /* A named character. */
1134
1135         ungetc(ch, tv->fp);
1136         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1137         cdc = 0;
1138         DRESET(&w); tvec_readword(tv, &w, ";", "character name");
1139         if (read_charname(&ch, w.buf, RCF_EOFOK)) {
1140           rc = tvec_error(tv, "unknown character name `%s'", d.buf);
1141           goto end;
1142         }
1143         DPUTC(&d, ch); break;
1144
1145       case '!':
1146         /* A magic keyword. */
1147
1148         if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1149         cdc = 0;
1150         ungetc(ch, tv->fp);
1151         DRESET(&w); tvec_readword(tv, &w, ";", "`!'-keyword");
1152
1153         /* Change bareword coding system. */
1154         if (STRCMP(w.buf, ==, "!bare"))
1155           { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
1156         else if (STRCMP(w.buf, ==, "!hex"))
1157           { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
1158         else if (STRCMP(w.buf, ==, "!base32"))
1159           { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
1160         else if (STRCMP(w.buf, ==, "!base64"))
1161           { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
1162
1163         /* Repeated substrings. */
1164         else if (STRCMP(w.buf, ==, "!repeat")) {
1165           if (tvec_nexttoken(tv)) {
1166             rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
1167             goto end;
1168           }
1169           DRESET(&w);
1170           if (tvec_readword(tv, &w, ";{", "repeat count"))
1171             { rc = -1; goto end;  }
1172           if (parse_unsigned_integer(&n, &q, w.buf)) {
1173             rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
1174             goto end;
1175           }
1176           if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
1177           if (tvec_nexttoken(tv))
1178             { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
1179           ch = getc(tv->fp); if (ch != '{')
1180             { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
1181           sz = 0;
1182           if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
1183             { rc = -1; goto end; }
1184           ch = getc(tv->fp); if (ch != '}')
1185             { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
1186           if (sz) {
1187             if (n > (size_t)-1/sz)
1188               { rc = tvec_error(tv, "repeat size out of range"); goto end; }
1189             dstr_ensure(&d, n*sz);
1190             if (sz == 1)
1191               { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; }
1192             else
1193               for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz);
1194           }
1195           xfree(pp); pp = 0;
1196         }
1197
1198         /* Anything else is an error. */
1199         else {
1200           tvec_error(tv, "unknown string keyword `%s'", w.buf);
1201           rc = -1; goto end;
1202         }
1203         break;
1204
1205       default:
1206         /* A bareword.  Process it according to the current coding system. */
1207
1208         switch (code) {
1209           case TVCODE_BARE:
1210             ungetc(ch, tv->fp);
1211             if (collect_bare(&d, tv)) goto done;
1212             break;
1213           default:
1214             assert(ccl);
1215             ungetc(ch, tv->fp); DRESET(&w);
1216             if (tvec_readword(tv, &w, ";", "%s-encoded fragment", ccl->name))
1217               { rc = -1; goto end; }
1218             if (!cdc) cdc = ccl->decoder(cdf);
1219             err = cdc->ops->code(cdc, w.buf, w.len, &d);
1220             if (err) {
1221               tvec_error(tv, "invalid %s fragment `%s': %s",
1222                          ccl->name, w.buf, codec_strerror(err));
1223               rc = -1; goto end;
1224             }
1225             break;
1226         }
1227         break;
1228     }
1229   } while (!tvec_nexttoken(tv));
1230
1231 done:
1232   /* Wrap things up. */
1233   if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1234   cdc = 0;
1235   if (*sz_inout <= d.len)
1236     { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); }
1237   p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
1238   rc = 0;
1239
1240 end:
1241   /* Clean up any debris. */
1242   if (cdc) cdc->ops->destroy(cdc);
1243   if (pp) xfree(pp);
1244   dstr_destroy(&d); dstr_destroy(&w);
1245   return (rc);
1246 }
1247
1248 /*----- Signed and unsigned integer types ---------------------------------*/
1249
1250 /* --- @init_int@, @init_uint@ --- *
1251  *
1252  * Arguments:   @union tvec_regval *rv@ = register value
1253  *              @const struct tvec_regdef *rd@ = register definition
1254  *
1255  * Returns:     ---
1256  *
1257  * Use:         Initialize a register value.
1258  *
1259  *              Integer values are initialized to zero.
1260  */
1261
1262 static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
1263   { rv->i = 0; }
1264
1265 static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
1266   { rv->u = 0; }
1267
1268 /* --- @eq_int@, @eq_uint@ --- *
1269  *
1270  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
1271  *              @const struct tvec_regdef *rd@ = register definition
1272  *
1273  * Returns:     Nonzero if the values are equal, zero if unequal
1274  *
1275  * Use:         Compare register values for equality.
1276  */
1277
1278 static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
1279                   const struct tvec_regdef *rd)
1280   { return (rv0->i == rv1->i); }
1281
1282 static int eq_uint(const union tvec_regval *rv0,
1283                    const union tvec_regval *rv1,
1284                    const struct tvec_regdef *rd)
1285   { return (rv0->u == rv1->u); }
1286
1287 /* --- @tobuf_int@, @tobuf_uint@ --- *
1288  *
1289  * Arguments:   @buf *b@ = buffer
1290  *              @const union tvec_regval *rv@ = register value
1291  *              @const struct tvec_regdef *rd@ = register definition
1292  *
1293  * Returns:     Zero on success, %$-1$% on failure.
1294  *
1295  * Use:         Serialize a register value to a buffer.
1296  *
1297  *              Integer values are serialized as little-endian 64-bit signed
1298  *              or unsigned integers.
1299  */
1300
1301 static int tobuf_int(buf *b, const union tvec_regval *rv,
1302                      const struct tvec_regdef *rd)
1303   { return (signed_to_buf(b, rv->i)); }
1304
1305 static int tobuf_uint(buf *b, const union tvec_regval *rv,
1306                        const struct tvec_regdef *rd)
1307   { return (unsigned_to_buf(b, rv->u)); }
1308
1309 /* --- @frombuf_int@, @frombuf_uint@ --- *
1310  *
1311  * Arguments:   @buf *b@ = buffer
1312  *              @union tvec_regval *rv@ = register value
1313  *              @const struct tvec_regdef *rd@ = register definition
1314  *
1315  * Returns:     Zero on success, %$-1$% on failure.
1316  *
1317  * Use:         Deserialize a register value from a buffer.
1318  *
1319  *              Integer values are serialized as 64-bit signed or unsigned
1320  *              integers.
1321  */
1322
1323 static int frombuf_int(buf *b, union tvec_regval *rv,
1324                        const struct tvec_regdef *rd)
1325   { return (signed_from_buf(b, &rv->i)); }
1326
1327 static int frombuf_uint(buf *b, union tvec_regval *rv,
1328                         const struct tvec_regdef *rd)
1329   { return (unsigned_from_buf(b, &rv->u)); }
1330
1331 /* --- @parse_int@, @parse_uint@ --- *
1332  *
1333  * Arguments:   @union tvec_regval *rv@ = register value
1334  *              @const struct tvec_regdef *rd@ = register definition
1335  *              @struct tvec_state *tv@ = test-vector state
1336  *
1337  * Returns:     Zero on success, %$-1$% on error.
1338  *
1339  * Use:         Parse a register value from an input file.
1340  *
1341  *              Integers may be input in decimal, hex, binary, or octal,
1342  *              following approximately usual conventions.
1343  *
1344  *                * Signed integers may be preceded with a `+' or `-' sign.
1345  *
1346  *                * Decimal integers are just a sequence of decimal digits
1347  *                  `0' ... `9'.
1348  *
1349  *                * Octal integers are a sequence of digits `0' ... `7',
1350  *                  preceded by `0o' or `0O'.
1351  *
1352  *                * Hexadecimal integers are a sequence of digits `0'
1353  *                  ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
1354  *                  `0X'.
1355  *
1356  *                * Radix-B integers are a sequence of digits `0' ... `9',
1357  *                  `a' ... `f', or `A' ... `F', each with value less than B,
1358  *                  preceded by `Br' or `BR', where 0 < B < 36 is expressed
1359  *                  in decimal without any leading `0' or internal
1360  *                  underscores `_'.
1361  *
1362  *                * A digit sequence may contain internal underscore `_'
1363  *                  separators, but not before or after all of the digits;
1364  *                  and two consecutive `_' characters are not permitted.
1365  */
1366
1367 static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
1368                      struct tvec_state *tv)
1369 {
1370   dstr d = DSTR_INIT;
1371   int rc;
1372
1373   if (tvec_readword(tv, &d, ";", "signed integer")) { rc = -1; goto end; }
1374   if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1375   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1376   rc = 0;
1377 end:
1378   dstr_destroy(&d);
1379   return (rc);
1380 }
1381
1382 static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
1383                       struct tvec_state *tv)
1384 {
1385   dstr d = DSTR_INIT;
1386   int rc;
1387
1388   if (tvec_readword(tv, &d, ";", "unsigned integer")) { rc = -1; goto end; }
1389   if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1390   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1391   rc = 0;
1392 end:
1393   dstr_destroy(&d);
1394   return (rc);
1395 }
1396
1397 /* --- @dump_int@, @dump_uint@ --- *
1398  *
1399  * Arguments:   @const union tvec_regval *rv@ = register value
1400  *              @const struct tvec_regdef *rd@ = register definition
1401  *              @unsigned style@ = output style (@TVSF_...@)
1402  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1403  *
1404  * Returns:     ---
1405  *
1406  * Use:         Dump a register value to the format output.
1407  *
1408  *              Integer values are dumped in decimal and, unless compact
1409  *              output is requested, hex, and maybe a character, as a
1410  *              comment.
1411  */
1412
1413 static void dump_int(const union tvec_regval *rv,
1414                      const struct tvec_regdef *rd,
1415                      unsigned style,
1416                      const struct gprintf_ops *gops, void *go)
1417 {
1418
1419   gprintf(gops, go, "%ld", rv->i);
1420   if (!(style&TVSF_COMPACT)) {
1421     gprintf(gops, go, " ; = ");
1422     format_signed_hex(gops, go, rv->i);
1423     maybe_format_signed_char(gops, go, rv->i);
1424   }
1425 }
1426
1427 static void dump_uint(const union tvec_regval *rv,
1428                       const struct tvec_regdef *rd,
1429                       unsigned style,
1430                       const struct gprintf_ops *gops, void *go)
1431 {
1432   gprintf(gops, go, "%lu", rv->u);
1433   if (!(style&TVSF_COMPACT)) {
1434     gprintf(gops, go, " ; = ");
1435     format_unsigned_hex(gops, go, rv->u);
1436     maybe_format_unsigned_char(gops, go, rv->u);
1437   }
1438 }
1439
1440 /* Integer type definitions. */
1441 const struct tvec_regty tvty_int = {
1442   init_int, trivial_release, eq_int,
1443   tobuf_int, frombuf_int,
1444   parse_int, dump_int
1445 };
1446 const struct tvec_regty tvty_uint = {
1447   init_uint, trivial_release, eq_uint,
1448   tobuf_uint, frombuf_uint,
1449   parse_uint, dump_uint
1450 };
1451
1452 /* Predefined integer ranges. */
1453 const struct tvec_irange
1454   tvrange_schar = { SCHAR_MIN, SCHAR_MAX },
1455   tvrange_short = { SHRT_MIN, SHRT_MAX },
1456   tvrange_int = { INT_MIN, INT_MAX },
1457   tvrange_long = { LONG_MIN, LONG_MAX },
1458   tvrange_sbyte = { -128, 127 },
1459   tvrange_i16 = { -32768, +32767 },
1460   tvrange_i32 = { -2147483648, 2147483647 };
1461 const struct tvec_urange
1462   tvrange_uchar = { 0, UCHAR_MAX },
1463   tvrange_ushort = { 0, USHRT_MAX },
1464   tvrange_uint = { 0, UINT_MAX },
1465   tvrange_ulong = { 0, ULONG_MAX },
1466   tvrange_size = { 0, (size_t)-1 },
1467   tvrange_byte = { 0, 255 },
1468   tvrange_u16 = { 0, 65535 },
1469   tvrange_u32 = { 0, 4294967296 };
1470
1471 /* --- @tvec_claimeq_int@ --- *
1472  *
1473  * Arguments:   @struct tvec_state *tv@ = test-vector state
1474  *              @long i0, i1@ = two signed integers
1475  *              @const char *file@, @unsigned @lno@ = calling file and line
1476  *              @const char *expr@ = the expression to quote on failure
1477  *
1478  * Returns:     Nonzero if @i0@ and @i1@ are equal, otherwise zero.
1479  *
1480  * Use:         Check that values of @i0@ and @i1@ are equal.  As for
1481  *              @tvec_claim@ above, a test case is automatically begun and
1482  *              ended if none is already underway.  If the values are
1483  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
1484  *              mismatched values are dumped: @i0@ is printed as the output
1485  *              value and @i1@ is printed as the input reference.
1486  */
1487
1488 int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
1489                      const char *file, unsigned lno, const char *expr)
1490 {
1491   tv->out[0].v.i = i0; tv->in[0].v.i = i1;
1492   return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
1493 }
1494
1495 /* --- @tvec_claimeq_uint@ --- *
1496  *
1497  * Arguments:   @struct tvec_state *tv@ = test-vector state
1498  *              @unsigned long u0, u1@ = two unsigned integers
1499  *              @const char *file@, @unsigned @lno@ = calling file and line
1500  *              @const char *expr@ = the expression to quote on failure
1501  *
1502  * Returns:     Nonzero if @u0@ and @u1@ are equal, otherwise zero.
1503  *
1504  * Use:         Check that values of @u0@ and @u1@ are equal.  As for
1505  *              @tvec_claim@ above, a test case is automatically begun and
1506  *              ended if none is already underway.  If the values are
1507  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
1508  *              mismatched values are dumped: @u0@ is printed as the output
1509  *              value and @u1@ is printed as the input reference.
1510  */
1511
1512 int tvec_claimeq_uint(struct tvec_state *tv,
1513                       unsigned long u0, unsigned long u1,
1514                       const char *file, unsigned lno, const char *expr)
1515 {
1516   tv->out[0].v.u = u0; tv->in[0].v.u = u1;
1517   return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
1518 }
1519
1520 /*----- Floating-point type -----------------------------------------------*/
1521
1522 /* --- @int_float@ --- *
1523  *
1524  * Arguments:   @union tvec_regval *rv@ = register value
1525  *              @const struct tvec_regdef *rd@ = register definition
1526  *
1527  * Returns:     ---
1528  *
1529  * Use:         Initialize a register value.
1530  *
1531  *              Floating-point values are initialized to zero.
1532  */
1533
1534 static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
1535   { rv->f = 0.0; }
1536
1537 /* --- @eq_float@ --- *
1538  *
1539  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
1540  *              @const struct tvec_regdef *rd@ = register definition
1541  *
1542  * Returns:     Nonzero if the values are equal, zero if unequal
1543  *
1544  * Use:         Compare register values for equality.
1545  *
1546  *              Floating-point values may be considered equal if their
1547  *              absolute or relative difference is sufficiently small, as
1548  *              described in the register definition.
1549  */
1550
1551 static int eq_float(const union tvec_regval *rv0,
1552                     const union tvec_regval *rv1,
1553                     const struct tvec_regdef *rd)
1554   { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }
1555
1556 /* --- @tobuf_float@ --- *
1557  *
1558  * Arguments:   @buf *b@ = buffer
1559  *              @const union tvec_regval *rv@ = register value
1560  *              @const struct tvec_regdef *rd@ = register definition
1561  *
1562  * Returns:     Zero on success, %$-1$% on failure.
1563  *
1564  * Use:         Serialize a register value to a buffer.
1565  *
1566  *              Floating-point values are serialized as little-endian
1567  *              IEEE 754 Binary64.
1568  */
1569
1570 static int tobuf_float(buf *b, const union tvec_regval *rv,
1571                      const struct tvec_regdef *rd)
1572   { return (buf_putf64l(b, rv->f)); }
1573
1574 /* --- @frombuf_float@ --- *
1575  *
1576  * Arguments:   @buf *b@ = buffer
1577  *              @union tvec_regval *rv@ = register value
1578  *              @const struct tvec_regdef *rd@ = register definition
1579  *
1580  * Returns:     Zero on success, %$-1$% on failure.
1581  *
1582  * Use:         Deserialize a register value from a buffer.
1583  *
1584  *              Floating-point values are serialized as little-endian
1585  *              IEEE 754 Binary64.
1586  */
1587
1588 static int frombuf_float(buf *b, union tvec_regval *rv,
1589                        const struct tvec_regdef *rd)
1590   { return (buf_getf64l(b, &rv->f)); }
1591
1592 /* --- @parse_float@ --- *
1593  *
1594  * Arguments:   @union tvec_regval *rv@ = register value
1595  *              @const struct tvec_regdef *rd@ = register definition
1596  *              @struct tvec_state *tv@ = test-vector state
1597  *
1598  * Returns:     Zero on success, %$-1$% on error.
1599  *
1600  * Use:         Parse a register value from an input file.
1601  *
1602  *              Floating-point values are either NaN (%|#nan|%, if supported
1603  *              by the platform); positive or negative infinity (%|#inf|%,
1604  *              %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
1605  *              or %|#-inf|% (preferring the latter), if supported by the
1606  *              platform); or a number in strtod(3) syntax.
1607  */
1608
1609 static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
1610                        struct tvec_state *tv)
1611 {
1612   dstr d = DSTR_INIT;
1613   int rc;
1614
1615   if (tvec_readword(tv, &d, ";", "floating-point number"))
1616     { rc = -1; goto end; }
1617   if (parse_floating(&rv->f, 0, d.buf, rd->arg.p, tv))
1618     { rc = -1; goto end; }
1619   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1620   rc = 0;
1621 end:
1622   dstr_destroy(&d);
1623   return (rc);
1624 }
1625
1626 /* --- @dump_float@ --- *
1627  *
1628  * Arguments:   @const union tvec_regval *rv@ = register value
1629  *              @const struct tvec_regdef *rd@ = register definition
1630  *              @unsigned style@ = output style (@TVSF_...@)
1631  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1632  *
1633  * Returns:     ---
1634  *
1635  * Use:         Dump a register value to the format output.
1636  *
1637  *              Floating-point values are dumped in decimal or as a special
1638  *              token beginning with `%|#|%'.  Some effort is taken to ensure
1639  *              that the output is sufficient to uniquely identify the
1640  *              original value, but, honestly, C makes this really hard.
1641  */
1642
1643 static void dump_float(const union tvec_regval *rv,
1644                        const struct tvec_regdef *rd,
1645                        unsigned style,
1646                        const struct gprintf_ops *gops, void *go)
1647   { format_floating(gops, go, rv->f); }
1648
1649 /* Floating-point type definition. */
1650 const struct tvec_regty tvty_float = {
1651   init_float, trivial_release, eq_float,
1652   tobuf_float, frombuf_float,
1653   parse_float, dump_float
1654 };
1655
1656 /* Predefined floating-point ranges. */
1657 const struct tvec_floatinfo
1658   tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
1659   tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };
1660
1661 /* --- @tvec_claimeqish_float@ --- *
1662  *
1663  * Arguments:   @struct tvec_state *tv@ = test-vector state
1664  *              @double f0, f1@ = two floating-point numbers
1665  *              @unsigned f@ = flags (@TVFF_...@)
1666  *              @double delta@ = maximum tolerable difference
1667  *              @const char *file@, @unsigned @lno@ = calling file and line
1668  *              @const char *expr@ = the expression to quote on failure
1669  *
1670  * Returns:     Nonzero if @f0@ and @u1@ are sufficiently close, otherwise
1671  *              zero.
1672  *
1673  * Use:         Check that values of @f0@ and @f1@ are sufficiently close.
1674  *              As for @tvec_claim@ above, a test case is automatically begun
1675  *              and ended if none is already underway.  If the values are
1676  *              too far apart, then @tvec_fail@ is called, quoting @expr@,
1677  *              and the mismatched values are dumped: @f0@ is printed as the
1678  *              output value and @f1@ is printed as the input reference.
1679  *
1680  *              The details for the comparison are as follows.
1681  *
1682  *                * A NaN value matches any other NaN, and nothing else.
1683  *
1684  *                * An infinity matches another infinity of the same sign,
1685  *                  and nothing else.
1686  *
1687  *                * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
1688  *                  representable number matches only itself: in particular,
1689  *                  positive and negative zero are considered distinct.
1690  *                  (This allows tests to check that they land on the correct
1691  *                  side of branch cuts, for example.)
1692  *
1693  *                * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
1694  *                  %$y$% when %$|x - y| < \delta$%.
1695  *
1696  *                * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
1697  *                  %$y$% when %$|1 - y/x| < \delta$%.  (Note that this
1698  *                  criterion is asymmetric FIXME
1699  */
1700
1701 int tvec_claimeqish_float(struct tvec_state *tv,
1702                           double f0, double f1, unsigned f, double delta,
1703                           const char *file, unsigned lno,
1704                           const char *expr)
1705 {
1706   struct tvec_floatinfo fi;
1707   union tvec_misc arg;
1708
1709   fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
1710   tv->out[0].v.f = f0; tv->in[0].v.f = f1;
1711   return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
1712 }
1713
1714 /* --- @tvec_claimeq_float@ --- *
1715  *
1716  * Arguments:   @struct tvec_state *tv@ = test-vector state
1717  *              @double f0, f1@ = two floating-point numbers
1718  *              @const char *file@, @unsigned @lno@ = calling file and line
1719  *              @const char *expr@ = the expression to quote on failure
1720  *
1721  * Returns:     Nonzero if @f0@ and @u1@ are identical, otherwise zero.
1722  *
1723  * Use:         Check that values of @f0@ and @f1@ are identical.  The
1724  *              function is exactly equivalent to @tvec_claimeqish_float@
1725  *              with @f == TVFF_EXACT@.
1726  */
1727
1728 int tvec_claimeq_float(struct tvec_state *tv,
1729                        double f0, double f1,
1730                        const char *file, unsigned lno,
1731                        const char *expr)
1732 {
1733   return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
1734                                 file, lno, expr));
1735 }
1736
1737 /*----- Durations ---------------------------------------------------------*/
1738
1739 /* A duration is a floating-point number of seconds.  Initialization and
1740  * teardown, equality comparison, and serialization are as for floating-point
1741  * values.
1742  */
1743
1744 static const struct duration_unit {
1745   const char *unit;
1746   double scale;
1747   unsigned f;
1748 #define DUF_PREFER 1u
1749 } duration_units[] = {
1750   { "Ys",       1e+24,          0 },
1751   { "Zs",       1e+21,          0 },
1752   { "Es",       1e+18,          0 },
1753   { "Ps",       1e+15,          0 },
1754   { "Ts",       1e+12,          0 },
1755   { "Gs",       1e+9,           0 },
1756   { "Ms",       1e+6,           0 },
1757   { "ks",       1e+3,           0 },
1758   { "hs",       1e+2,           0 },
1759   { "das",      1e+1,           0 },
1760
1761   { "yr",       31557600.0,     DUF_PREFER },
1762   { "y",        31557600.0,     0 },
1763   { "day",      86400.0,        DUF_PREFER },
1764   { "dy",       86400.0,        0 },
1765   { "d",        86400.0,        0 },
1766   { "hr",       3600.0,         DUF_PREFER },
1767   { "hour",     3600.0,         0 },
1768   { "h",        3600.0,         0 },
1769   { "min",      60.0,           DUF_PREFER },
1770   { "m",        60.0,           0 },
1771
1772   { "s",        1.0,            DUF_PREFER },
1773   { "sec",      1.0,            0 },
1774
1775   { "ds",       1e-1,           0 },
1776   { "cs",       1e-2,           0 },
1777   { "ms",       1e-3,           DUF_PREFER },
1778   { "µs",      1e-6,           DUF_PREFER },
1779   { "ns",       1e-9,           DUF_PREFER },
1780   { "ps",       1e-12,          DUF_PREFER },
1781   { "fs",       1e-15,          DUF_PREFER },
1782   { "as",       1e-18,          DUF_PREFER },
1783   { "zs",       1e-21,          DUF_PREFER },
1784   { "ys",       1e-24,          DUF_PREFER },
1785
1786   { 0 }
1787 };
1788
1789 /* --- @parse_duration@ --- *
1790  *
1791  * Arguments:   @union tvec_regval *rv@ = register value
1792  *              @const struct tvec_regdef *rd@ = register definition
1793  *              @struct tvec_state *tv@ = test-vector state
1794  *
1795  * Returns:     Zero on success, %$-1$% on error.
1796  *
1797  * Use:         Parse a register value from an input file.
1798  *
1799  *              Duration values are finite nonnegative floating-point
1800  *              numbers in @strtod@ syntax, optionally followed by a unit .
1801  */
1802
1803 static int parse_duration(union tvec_regval *rv,
1804                           const struct tvec_regdef *rd,
1805                           struct tvec_state *tv)
1806 {
1807   const struct duration_unit *u;
1808   const char *q;
1809   dstr d = DSTR_INIT; size_t pos;
1810   double t;
1811   int rc;
1812
1813   if (tvec_readword(tv, &d, ";", "duration")) { rc = -1; goto end; }
1814   if (parse_floating(&t, &q, d.buf,
1815                      rd->arg.p ? rd->arg.p : &tvflt_nonneg, tv))
1816     { rc = -1; goto end; }
1817
1818   if (!q) {
1819     tvec_skipspc(tv); pos = d.len;
1820     if (!tvec_readword(tv, &d, ";", 0)) q = d.buf + pos + 1;
1821   }
1822
1823   if (q) {
1824     for (u = duration_units; u->unit; u++)
1825       if (STRCMP(q, ==, u->unit)) { t *= u->scale; goto found_unit; }
1826     rc = tvec_syntax(tv, *q, "end-of-line"); goto end;
1827   found_unit:;
1828   }
1829
1830   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1831   rv->f = t; rc = 0;
1832 end:
1833   dstr_destroy(&d);
1834   return (rc);
1835 }
1836
1837 /* --- @dump_duration@ --- *
1838  *
1839  * Arguments:   @const union tvec_regval *rv@ = register value
1840  *              @const struct tvec_regdef *rd@ = register definition
1841  *              @unsigned style@ = output style (@TVSF_...@)
1842  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
1843  *
1844  * Returns:     ---
1845  *
1846  * Use:         Dump a register value to the format output.
1847  *
1848  *              Durations are dumped as a human-palatable scaled value with
1849  *              unit, and, if compact style is not requested, as a raw number
1850  *              of seconds at full precision as a comment.
1851  */
1852
1853 static void dump_duration(const union tvec_regval *rv,
1854                           const struct tvec_regdef *rd,
1855                           unsigned style,
1856                           const struct gprintf_ops *gops, void *go)
1857 {
1858   const struct duration_unit *u;
1859   double t = rv->f;
1860
1861   if (!t) u = 0;
1862   else {
1863     for (u = duration_units; u->scale > t && u[1].unit; u++);
1864     t /= u->scale;
1865   }
1866
1867   gprintf(gops, go, "%.4g %s", t, u ? u->unit : "s");
1868   if (!(style&TVSF_COMPACT)) {
1869     gprintf(gops, go, "; = ");
1870     format_floating(gops, go, rv->f);
1871     gprintf(gops, go, " s");
1872   }
1873 }
1874
1875 /* Duration type definition. */
1876 const struct tvec_regty tvty_duration = {
1877   init_float, trivial_release, eq_float,
1878   tobuf_float, frombuf_float,
1879   parse_duration, dump_duration
1880 };
1881
1882 /*----- Enumerations ------------------------------------------------------*/
1883
1884 /* --- @init_tenum@ --- *
1885  *
1886  * Arguments:   @union tvec_regval *rv@ = register value
1887  *              @const struct tvec_regdef *rd@ = register definition
1888  *
1889  * Returns:     ---
1890  *
1891  * Use:         Initialize a register value.
1892  *
1893  *              Integer and floating-point enumeration values are initialized
1894  *              as their underlying representations.  Pointer enumerations
1895  *              are initialized to %|#nil|%.
1896  */
1897
1898 #define init_ienum init_int
1899 #define init_uenum init_uint
1900 #define init_fenum init_float
1901
1902 static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
1903   { rv->p = 0; }
1904
1905 /* --- @eq_tenum@ --- *
1906  *
1907  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
1908  *              @const struct tvec_regdef *rd@ = register definition
1909  *
1910  * Returns:     Nonzero if the values are equal, zero if unequal
1911  *
1912  * Use:         Compare register values for equality.
1913  *
1914  *              Integer and floating-point enumeration values are compared as
1915  *              their underlying representations; in particular, floating-
1916  *              point enumerations may compare equal if their absolute or
1917  *              relative difference is sufficiently small.  Pointer
1918  *              enumerations are compared as pointers.
1919  */
1920
1921 #define eq_ienum eq_int
1922 #define eq_uenum eq_uint
1923
1924 static int eq_fenum(const union tvec_regval *rv0,
1925                     const union tvec_regval *rv1,
1926                     const struct tvec_regdef *rd)
1927 {
1928   const struct tvec_fenuminfo *ei = rd->arg.p;
1929   return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
1930 }
1931
1932 static int eq_penum(const union tvec_regval *rv0,
1933                     const union tvec_regval *rv1,
1934                     const struct tvec_regdef *rd)
1935   { return (rv0->p == rv1->p); }
1936
1937 /* --- @tobuf_tenum@ --- *
1938  *
1939  * Arguments:   @buf *b@ = buffer
1940  *              @const union tvec_regval *rv@ = register value
1941  *              @const struct tvec_regdef *rd@ = register definition
1942  *
1943  * Returns:     Zero on success, %$-1$% on failure.
1944  *
1945  * Use:         Serialize a register value to a buffer.
1946  *
1947  *              Integer and floating-point enumeration values are serialized
1948  *              as their underlying representations.  Pointer enumerations
1949  *              are serialized as the signed integer index into the
1950  *              association table; %|#nil|% serializes as %$-1$%, and
1951  *              unrecognized pointers cause failure.
1952  */
1953
1954 #define tobuf_ienum tobuf_int
1955 #define tobuf_uenum tobuf_uint
1956 #define tobuf_fenum tobuf_float
1957
1958 static int tobuf_penum(buf *b, const union tvec_regval *rv,
1959                        const struct tvec_regdef *rd)
1960 {
1961   const struct tvec_penuminfo *pei = rd->arg.p;
1962   const struct tvec_passoc *pa;
1963   long i;
1964
1965   for (pa = pei->av, i = 0; pa->tag; pa++, i++)
1966     if (pa->p == rv->p) goto found;
1967   if (!rv->p) i = -1;
1968   else return (-1);
1969 found:
1970   return (signed_to_buf(b, i));
1971 }
1972
1973 /* --- @frombuf_tenum@ --- *
1974  *
1975  * Arguments:   @buf *b@ = buffer
1976  *              @union tvec_regval *rv@ = register value
1977  *              @const struct tvec_regdef *rd@ = register definition
1978  *
1979  * Returns:     Zero on success, %$-1$% on failure.
1980  *
1981  * Use:         Deserialize a register value from a buffer.
1982  *
1983  *              Integer and floating-point enumeration values are serialized
1984  *              as their underlying representations.  Pointer enumerations
1985  *              are serialized as the signed integer index into the
1986  *              association table; %|#nil|% serializes as %$-1$%; out-of-
1987  *              range indices cause failure.
1988  */
1989
1990 #define frombuf_ienum frombuf_int
1991 #define frombuf_uenum frombuf_uint
1992 #define frombuf_fenum frombuf_float
1993 static int frombuf_penum(buf *b, union tvec_regval *rv,
1994                         const struct tvec_regdef *rd)
1995 {
1996   const struct tvec_penuminfo *pei = rd->arg.p;
1997   const struct tvec_passoc *pa;
1998   long i, n;
1999
2000   for (pa = pei->av, n = 0; pa->tag; pa++, n++);
2001   if (signed_from_buf(b, &i)) return (-1);
2002   if (0 <= i && i < n) rv->p = (/*unconst*/ void *)pei->av[i].p;
2003   else if (i == -1) rv->p = 0;
2004   else return (-1);
2005   return (0);
2006 }
2007
2008 /* --- @parse_tenum@ --- *
2009  *
2010  * Arguments:   @union tvec_regval *rv@ = register value
2011  *              @const struct tvec_regdef *rd@ = register definition
2012  *              @struct tvec_state *tv@ = test-vector state
2013  *
2014  * Returns:     Zero on success, %$-1$% on error.
2015  *
2016  * Use:         Parse a register value from an input file.
2017  *
2018  *              An enumerated value may be given by name or as a literal
2019  *              value.  For enumerations based on numeric types, the literal
2020  *              values can be written in the same syntax as the underlying
2021  *              values.  For enumerations based on pointers, the only
2022  *              permitted literal is %|#nil|%, which denotes a null pointer.
2023  */
2024
2025 #define DEFPARSE_ENUM(tag_, ty, slot)                                   \
2026   static int parse_##slot##enum(union tvec_regval *rv,                  \
2027                                 const struct tvec_regdef *rd,           \
2028                                 struct tvec_state *tv)                  \
2029   {                                                                     \
2030     const struct tvec_##slot##enuminfo *ei = rd->arg.p;                 \
2031     const struct tvec_##slot##assoc *a;                                 \
2032     dstr d = DSTR_INIT;                                                 \
2033     int rc;                                                             \
2034                                                                         \
2035     if (tvec_readword(tv, &d, ";", "enumeration tag or " LITSTR_##tag_)) \
2036       { rc = -1; goto end; }                                            \
2037     for (a = ei->av; a->tag; a++)                                       \
2038       if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; }        \
2039     MISSING_##tag_                                                      \
2040     done:                                                               \
2041     if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }                  \
2042     rc = 0;                                                             \
2043   end:                                                                  \
2044     dstr_destroy(&d);                                                   \
2045     return (rc);                                                        \
2046   }
2047
2048 #define LITSTR_INT      "literal signed integer"
2049 #define FOUND_INT       rv->i = a->i;
2050 #define MISSING_INT     if (parse_signed(&rv->i, d.buf, ei->ir, tv))    \
2051                           { rc = -1; goto end; }
2052
2053 #define LITSTR_UINT     "literal unsigned integer"
2054 #define FOUND_UINT      rv->u = a->u;
2055 #define MISSING_UINT    if (parse_unsigned(&rv->u, d.buf, ei->ur, tv))  \
2056                           { rc = -1; goto end; }
2057
2058 #define LITSTR_FLT      "literal floating-point number, "               \
2059                           "`#-inf', `#+inf', or `#nan'"
2060 #define FOUND_FLT       rv->f = a->f;
2061 #define MISSING_FLT     if (parse_floating(&rv->f, 0, d.buf, ei->fi, tv)) \
2062                           { rc = -1; goto end; }
2063
2064 #define LITSTR_PTR      "`#nil'"
2065 #define FOUND_PTR       rv->p = (/*unconst*/ void *)a->p;
2066 #define MISSING_PTR     if (STRCMP(d.buf, ==, "#nil"))                  \
2067                           rv->p = 0;                                    \
2068                         else {                                          \
2069                           tvec_error(tv, "unknown `%s' value `%s'",     \
2070                                      ei->name, d.buf);                  \
2071                           rc = -1; goto end;                            \
2072                         }
2073
2074 TVEC_MISCSLOTS(DEFPARSE_ENUM)
2075
2076 #undef LITSTR_INT
2077 #undef FOUND_INT
2078 #undef MISSING_INT
2079
2080 #undef LITSTR_UINT
2081 #undef FOUND_UINT
2082 #undef MISSING_UINT
2083
2084 #undef LITSTR_FLT
2085 #undef FOUND_FLT
2086 #undef MISSING_FLT
2087
2088 #undef LITSTR_PTR
2089 #undef FOUND_PTR
2090 #undef MISSING_PTR
2091
2092 #undef DEFPARSE_ENUM
2093
2094 /* --- @dump_tenum@ --- *
2095  *
2096  * Arguments:   @const union tvec_regval *rv@ = register value
2097  *              @const struct tvec_regdef *rd@ = register definition
2098  *              @unsigned style@ = output style (@TVSF_...@)
2099  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2100  *
2101  * Returns:     ---
2102  *
2103  * Use:         Dump a register value to the format output.
2104  *
2105  *              Enumeration values are dumped as their symbolic names, if
2106  *              possible, with the underlying values provided as a comment
2107  *              unless compact output is requested, as for the underlying
2108  *              representation.  A null pointer is printed as %|#nil|%;
2109  *              non-null pointers are printed as %|#<TYPE PTR>|%, with the
2110  *              enumeration TYPE and the raw pointer PTR printed with the
2111  *              system's %|%p|% format specifier.
2112  */
2113
2114
2115 #define DEFDUMP_ENUM(tag_, ty, slot)                                    \
2116   static void dump_##slot##enum(const union tvec_regval *rv,            \
2117                                 const struct tvec_regdef *rd,           \
2118                                 unsigned style,                         \
2119                                 const struct gprintf_ops *gops, void *go) \
2120   {                                                                     \
2121     const struct tvec_##slot##enuminfo *ei = rd->arg.p;                 \
2122     const struct tvec_##slot##assoc *a;                                 \
2123                                                                         \
2124     for (a = ei->av; a->tag; a++)                                       \
2125       if (rv->slot == a->slot) {                                        \
2126         gprintf(gops, go, "%s", a->tag);                                \
2127         if (style&TVSF_COMPACT) return;                                 \
2128         gprintf(gops, go, " ; = "); break;                              \
2129       }                                                                 \
2130                                                                         \
2131     PRINTRAW_##tag_                                                     \
2132   }
2133
2134 #define MAYBE_PRINT_EXTRA                                               \
2135         if (style&TVSF_COMPACT) /* nothing to do */;                    \
2136         else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; }  \
2137         else if (1) { gprintf(gops, go, " = "); goto _extra; }          \
2138         else _extra:
2139
2140 #define PRINTRAW_INT    gprintf(gops, go, "%ld", rv->i);                \
2141                         MAYBE_PRINT_EXTRA {                             \
2142                           format_signed_hex(gops, go, rv->i);           \
2143                           maybe_format_signed_char(gops, go, rv->i);    \
2144                         }
2145
2146 #define PRINTRAW_UINT   gprintf(gops, go, "%lu", rv->u);                \
2147                         MAYBE_PRINT_EXTRA {                             \
2148                           format_unsigned_hex(gops, go, rv->u);         \
2149                           maybe_format_unsigned_char(gops, go, rv->u);  \
2150                         }
2151
2152 #define PRINTRAW_FLT    format_floating(gops, go, rv->f);
2153
2154 #define PRINTRAW_PTR    if (!rv->p) gprintf(gops, go, "#nil");          \
2155                         else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);
2156
2157 TVEC_MISCSLOTS(DEFDUMP_ENUM)
2158
2159 #undef PRINTRAW_INT
2160 #undef PRINTRAW_UINT
2161 #undef PRINTRAW_FLT
2162 #undef PRINTRAW_PTR
2163
2164 #undef MAYBE_PRINT_EXTRA
2165 #undef DEFDUMP_ENUM
2166
2167 /* Enumeration type definitions. */
2168 #define DEFTY_ENUM(tag, ty, slot)                                       \
2169   const struct tvec_regty tvty_##slot##enum = {                         \
2170     init_##slot##enum, trivial_release, eq_##slot##enum,                \
2171     tobuf_##slot##enum, frombuf_##slot##enum,                           \
2172     parse_##slot##enum, dump_##slot##enum                               \
2173   };
2174 TVEC_MISCSLOTS(DEFTY_ENUM)
2175 #undef DEFTY_ENUM
2176
2177 /* Predefined enumeration types. */
2178 static const struct tvec_iassoc bool_assoc[] = {
2179   { "nil",              0 },
2180   { "false",            0 },
2181   { "f",                0 },
2182   { "no",               0 },
2183   { "n",                0 },
2184   { "off",              0 },
2185
2186   { "t",                1 },
2187   { "true",             1 },
2188   { "yes",              1 },
2189   { "y",                1 },
2190   { "on",               1 },
2191
2192   TVEC_ENDENUM
2193 };
2194
2195 const struct tvec_ienuminfo tvenum_bool =
2196   { "bool", bool_assoc, &tvrange_int };
2197
2198 static const struct tvec_iassoc cmp_assoc[] = {
2199   { "<",                -1 },
2200   { "less",             -1 },
2201   { "lt",               -1 },
2202
2203   { "=",                 0 },
2204   { "equal",             0 },
2205   { "eq",                0 },
2206
2207   { ">",                +1 },
2208   { "greater",          +1 },
2209   { "gt",               +1 },
2210
2211   TVEC_ENDENUM
2212 };
2213
2214 const struct tvec_ienuminfo tvenum_cmp =
2215   { "cmp", cmp_assoc, &tvrange_int };
2216
2217 /* --- @tvec_claimeq_tenum@ --- *
2218  *
2219  * Arguments:   @struct tvec_state *tv@ = test-vector state
2220  *              @const struct tvec_typeenuminfo *ei@ = enumeration type info
2221  *              @ty t0, t1@ = two values
2222  *              @const char *file@, @unsigned @lno@ = calling file and line
2223  *              @const char *expr@ = the expression to quote on failure
2224  *
2225  * Returns:     Nonzero if @t0@ and @t1@ are equal, otherwise zero.
2226  *
2227  * Use:         Check that values of @t0@ and @t1@ are equal.  As for
2228  *              @tvec_claim@ above, a test case is automatically begun and
2229  *              ended if none is already underway.  If the values are
2230  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
2231  *              mismatched values are dumped: @t0@ is printed as the output
2232  *              value and @t1@ is printed as the input reference.
2233  */
2234
2235 #define DEFCLAIM(tag, ty, slot)                                         \
2236         int tvec_claimeq_##slot##enum                                   \
2237           (struct tvec_state *tv,                                       \
2238            const struct tvec_##slot##enuminfo *ei, ty e0, ty e1,        \
2239            const char *file, unsigned lno, const char *expr)            \
2240         {                                                               \
2241           union tvec_misc arg;                                          \
2242                                                                         \
2243           arg.p = ei;                                                   \
2244           tv->out[0].v.slot = GET_##tag(e0);                            \
2245           tv->in[0].v.slot = GET_##tag(e1);                             \
2246           return (tvec_claimeq(tv, &tvty_##slot##enum, &arg,            \
2247                                file, lno, expr));                       \
2248         }
2249 #define GET_INT(e) (e)
2250 #define GET_UINT(e) (e)
2251 #define GET_FLT(e) (e)
2252 #define GET_PTR(e) ((/*unconst*/ void *)(e))
2253 TVEC_MISCSLOTS(DEFCLAIM)
2254 #undef DEFCLAIM
2255 #undef GET_INT
2256 #undef GET_UINT
2257 #undef GET_FLT
2258 #undef GET_PTR
2259
2260 /*----- Flag types --------------------------------------------------------*/
2261
2262 /* Flag types are initialized, compared, and serialized as unsigned
2263  * integers.
2264  */
2265
2266 /* --- @parse_flags@ --- *
2267  *
2268  * Arguments:   @union tvec_regval *rv@ = register value
2269  *              @const struct tvec_regdef *rd@ = register definition
2270  *              @struct tvec_state *tv@ = test-vector state
2271  *
2272  * Returns:     Zero on success, %$-1$% on error.
2273  *
2274  * Use:         Parse a register value from an input file.
2275  *
2276  *              The input syntax is a sequence of items separated by `|'
2277  *              signs.  Each item may be the symbolic name of a field value,
2278  *              or a literal unsigned integer.  The masks associated with the
2279  *              given symbolic names must be disjoint.  The resulting
2280  *              numerical value is simply the bitwise OR of the given values.
2281  */
2282
2283 static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
2284                        struct tvec_state *tv)
2285 {
2286   const struct tvec_flaginfo *fi = rd->arg.p;
2287   const struct tvec_flag *f;
2288   unsigned long m = 0, v = 0, t;
2289   dstr d = DSTR_INIT;
2290   int ch, rc;
2291
2292   for (;;) {
2293
2294     /* Read the next item. */
2295     DRESET(&d);
2296     if (tvec_readword(tv, &d, "|;", "flag name or integer"))
2297       { rc = -1; goto end; }
2298
2299     /* Try to find a matching entry in the table. */
2300     for (f = fi->fv; f->tag; f++)
2301       if (STRCMP(f->tag, ==, d.buf)) {
2302         if (m&f->m)
2303           { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2304         else
2305           { m |= f->m; v |= f->v; goto next; }
2306       }
2307
2308     /* Otherwise, try to parse it as a raw integer. */
2309     if (parse_unsigned(&t, d.buf, fi->range, tv))
2310       { rc = -1; goto end; }
2311     v |= t;
2312
2313   next:
2314     /* Advance to the next token.  If it's a separator then consume it, and
2315      * go round again.  Otherwise we stop here.
2316      */
2317     if (tvec_nexttoken(tv)) break;
2318     ch = getc(tv->fp);
2319       if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
2320       if (tvec_nexttoken(tv))
2321       { tvec_syntax(tv, '\n', "flag name or integer"); rc = -1; goto end; }
2322   }
2323
2324   /* Done. */
2325   rv->u = v; rc = 0;
2326 end:
2327   dstr_destroy(&d);
2328   return (rc);
2329 }
2330
2331 /* --- @dump_flags@ --- *
2332  *
2333  * Arguments:   @const union tvec_regval *rv@ = register value
2334  *              @const struct tvec_regdef *rd@ = register definition
2335  *              @unsigned style@ = output style (@TVSF_...@)
2336  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2337  *
2338  * Returns:     ---
2339  *
2340  * Use:         Dump a register value to the format output.
2341  *
2342  *              The table of symbolic names and their associated values and
2343  *              masks is repeatedly scanned, in order, to find disjoint
2344  *              matches -- i.e., entries whose value matches the target value
2345  *              in the bit positions indicated by the mask, and whose mask
2346  *              doesn't overlap with any previously found matches; the names
2347  *              are then output, separated by `|'.  Any remaining nonzero
2348  *              bits not covered by any of the matching masks are output as a
2349  *              single literal integer, in hex.
2350  *
2351  *              Unless compact output is requested, or no symbolic names were
2352  *              found, the raw numeric value is also printed in hex, as a
2353  *              comment.
2354  */
2355
2356 static void dump_flags(const union tvec_regval *rv,
2357                        const struct tvec_regdef *rd,
2358                        unsigned style,
2359                        const struct gprintf_ops *gops, void *go)
2360 {
2361   const struct tvec_flaginfo *fi = rd->arg.p;
2362   const struct tvec_flag *f;
2363   unsigned long m = ~0ul, v = rv->u;
2364   const char *sep;
2365
2366   for (f = fi->fv, sep = ""; f->tag; f++)
2367     if ((m&f->m) && (v&f->m) == f->v) {
2368       gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
2369       sep = style&TVSF_COMPACT ? "|" : " | ";
2370     }
2371
2372   if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);
2373
2374   if (m != ~0ul && !(style&TVSF_COMPACT))
2375     gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
2376 }
2377
2378 /* Flags type definition. */
2379 const struct tvec_regty tvty_flags = {
2380   init_uint, trivial_release, eq_uint,
2381   tobuf_uint, frombuf_uint,
2382   parse_flags, dump_flags
2383 };
2384
2385 /* --- @tvec_claimeq_flags@ --- *
2386  *
2387  * Arguments:   @struct tvec_state *tv@ = test-vector state
2388  *              @const struct tvec_flaginfo *fi@ = flags type info
2389  *              @unsigned long f0, f1@ = two values
2390  *              @const char *file@, @unsigned @lno@ = calling file and line
2391  *              @const char *expr@ = the expression to quote on failure
2392  *
2393  * Returns:     Nonzero if @f0@ and @f1@ are equal, otherwise zero.
2394  *
2395  * Use:         Check that values of @f0@ and @f1@ are equal.  As for
2396  *              @tvec_claim@ above, a test case is automatically begun and
2397  *              ended if none is already underway.  If the values are
2398  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
2399  *              mismatched values are dumped: @f0@ is printed as the output
2400  *              value and @f1@ is printed as the input reference.
2401  */
2402
2403 int tvec_claimeq_flags(struct tvec_state *tv,
2404                        const struct tvec_flaginfo *fi,
2405                        unsigned long f0, unsigned long f1,
2406                        const char *file, unsigned lno, const char *expr)
2407 {
2408   union tvec_misc arg;
2409
2410   arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
2411   return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
2412 }
2413
2414 /*----- Characters --------------------------------------------------------*/
2415
2416 /* Character values are initialized and compared as signed integers. */
2417
2418 /* --- @tobuf_char@ --- *
2419  *
2420  * Arguments:   @buf *b@ = buffer
2421  *              @const union tvec_regval *rv@ = register value
2422  *              @const struct tvec_regdef *rd@ = register definition
2423  *
2424  * Returns:     Zero on success, %$-1$% on failure.
2425  *
2426  * Use:         Serialize a register value to a buffer.
2427  *
2428  *              Character values are serialized as little-endian 32-bit
2429  *              unsigned integers, with %|EOF|% serialized as all-bits-set.
2430  */
2431
2432 static int tobuf_char(buf *b, const union tvec_regval *rv,
2433                       const struct tvec_regdef *rd)
2434 {
2435   uint32 u;
2436
2437   if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
2438   else if (rv->i == EOF) u = MASK32;
2439   else return (-1);
2440   return (buf_putu32l(b, u));
2441 }
2442
2443 /* --- @frombuf_char@ --- *
2444  *
2445  * Arguments:   @buf *b@ = buffer
2446  *              @union tvec_regval *rv@ = register value
2447  *              @const struct tvec_regdef *rd@ = register definition
2448  *
2449  * Returns:     Zero on success, %$-1$% on failure.
2450  *
2451  * Use:         Deserialize a register value from a buffer.
2452  *
2453  *              Character values are serialized as little-endian 32-bit
2454  *              unsigned integers, with %|EOF|% serialized as all-bits-set.
2455  */
2456
2457 static int frombuf_char(buf *b, union tvec_regval *rv,
2458                         const struct tvec_regdef *rd)
2459 {
2460   uint32 u;
2461
2462   if (buf_getu32l(b, &u)) return (-1);
2463   if (0 <= u && u <= UCHAR_MAX) rv->i = u;
2464   else if (u == MASK32) rv->i = EOF;
2465   else return (-1);
2466   return (0);
2467 }
2468
2469 /* --- @parse_char@ --- *
2470  *
2471  * Arguments:   @union tvec_regval *rv@ = register value
2472  *              @const struct tvec_regdef *rd@ = register definition
2473  *              @struct tvec_state *tv@ = test-vector state
2474  *
2475  * Returns:     Zero on success, %$-1$% on error.
2476  *
2477  * Use:         Parse a register value from an input file.
2478  *
2479  *              A character value can be given by symbolic name, with a
2480  *              leading `%|#|%'; or a character or `%|\|%'-escape sequence,
2481  *              optionally in single quotes.
2482  *
2483  *              The following escape sequences and character names are
2484  *              recognized.
2485  *
2486  *              * `%|#eof|%' is the special end-of-file marker.
2487  *
2488  *              * `%|#nul|%' is the NUL character, sometimes used to
2489  *                terminate strings.
2490  *
2491  *              * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
2492  *                character used to ring the terminal bell (or do some other
2493  *                thing to attract the user's attention).
2494  *
2495  *              * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
2496  *                character, used to move the cursor backwords by one cell.
2497  *
2498  *              * %|#escape|% %|#esc|%, or%|\e|% is the escape character,
2499  *                used to introduce special terminal commands.
2500  *
2501  *              * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
2502  *                character, used to separate pages of text.
2503  *
2504  *              * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
2505  *                the newline character, used to terminate lines of text or
2506  *                advance the cursor to the next line (perhaps without
2507  *                returning it to the start of the line).
2508  *
2509  *              * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
2510  *                the carriage-return character, used to return the cursor to
2511  *                the start of the line.
2512  *
2513  *              * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
2514  *                tab character, used to advance the cursor to the next tab
2515  *                stop on the current line.
2516  *
2517  *              * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
2518  *                character.
2519  *
2520  *              * %|#space|%, %|#spc|% is the space character.
2521  *
2522  *              * %|#delete|%, %|#del|% is the delete character, used to
2523  *                erase the most recent character.
2524  *
2525  *              * %|\'|% is the single-quote character.
2526  *
2527  *              * %|\\|% is the backslash character.
2528  *
2529  *              * %|\"|% is the double-quote character.
2530  *
2531  *              * %|\NNN|% or %|\{NNN}|% is the character with code NNN in
2532  *                octal.  The NNN may be up to three digits long.
2533  *
2534  *              * %|\xNN|% or %|\x{NN}|% is the character with code NNN in
2535  *                hexadecimal.
2536  */
2537
2538 static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
2539                       struct tvec_state *tv)
2540 {
2541   dstr d = DSTR_INIT;
2542   int ch, rc;
2543   unsigned f = 0;
2544 #define f_quote 1u
2545
2546   /* Inspect the character to see what we're up against. */
2547   ch = getc(tv->fp);
2548
2549   if (ch == '#') {
2550     /* It looks like a special token.  Push the `%|#|%' back and fetch the
2551      * whole word.  If there's just the `%|#|%' after all, then treat it as
2552      * literal.
2553      */
2554
2555     ungetc(ch, tv->fp);
2556     if (tvec_readword(tv, &d, ";", "character name")) { rc = -1; goto end; }
2557     if (STRCMP(d.buf, !=, "#")) {
2558       if (read_charname(&ch, d.buf, RCF_EOFOK)) {
2559         rc = tvec_error(tv, "unknown character name `%s'", d.buf);
2560         goto end;
2561       }
2562       if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2563       rv->i = ch; rc = 0; goto end;
2564     }
2565   }
2566
2567   /* If this is a single quote then we expect to see a matching one later,
2568    * and we should process backslash escapes.  Get the next character and see
2569    * what happens.
2570    */
2571   if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
2572
2573   /* Main character dispatch. */
2574   switch (ch) {
2575
2576     case ';':
2577       /* Unquoted, semicolon begins a comment. */
2578       if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
2579       else goto plain;
2580
2581     case '\n':
2582       /* A newline.  If we saw a single quote, then treat that as literal.
2583        * Otherwise this is an error.
2584        */
2585       if (!(f&f_quote)) goto nochar;
2586       else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
2587
2588     case EOF:
2589       /* End-of-file.  Similar to newline, but with slightly different
2590        * effects on the parse state.
2591        */
2592       if (!(f&f_quote)) goto nochar;
2593       else { f &= ~f_quote; ch = '\''; goto plain; }
2594
2595     case '\'': nochar:
2596       /* A single quote.  This must be the second of a pair, and there should
2597        * have been a character or escape sequence between them.
2598        */
2599       rc = tvec_syntax(tv, ch, "character"); goto end;
2600
2601     case '\\':
2602       /* A backslash.  Read a character escape. */
2603       if (read_charesc(&ch, tv)) return (-1);
2604
2605     default: plain:
2606       /* Anything else.  Treat as literal. */
2607       rv->i = ch; break;
2608   }
2609
2610   /* If we saw an opening quote, then expect the closing quote. */
2611   if (f&f_quote) {
2612     ch = getc(tv->fp);
2613     if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
2614   }
2615
2616   /* Done. */
2617   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2618   rc = 0;
2619 end:
2620   dstr_destroy(&d);
2621   return (rc);
2622
2623 #undef f_quote
2624 }
2625
2626 /* --- @dump_char@ --- *
2627  *
2628  * Arguments:   @const union tvec_regval *rv@ = register value
2629  *              @const struct tvec_regdef *rd@ = register definition
2630  *              @unsigned style@ = output style (@TVSF_...@)
2631  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2632  *
2633  * Returns:     ---
2634  *
2635  * Use:         Dump a register value to the format output.
2636  *
2637  *              Character values are dumped as their symbolic names, if any,
2638  *              or as a character or escape sequence within single quotes
2639  *              (which may be omitted in compact style).  If compact output
2640  *              is not requested, then the single-quoted representation (for
2641  *              characters dumped as symbolic names) and integer code in
2642  *              decimal and hex are printed as a comment.
2643  */
2644
2645 static void dump_char(const union tvec_regval *rv,
2646                       const struct tvec_regdef *rd,
2647                       unsigned style,
2648                       const struct gprintf_ops *gops, void *go)
2649 {
2650   const char *p;
2651   unsigned f = 0;
2652 #define f_semi 1u
2653
2654   /* Print a character name if we can find one. */
2655   p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
2656   if (p) {
2657     gprintf(gops, go, "%s", p);
2658     if (style&TVSF_COMPACT) return;
2659     else { gprintf(gops, go, " ;"); f |= f_semi; }
2660   }
2661
2662   /* If the character isn't @EOF@ then print it as a single-quoted thing.
2663    * In compact style, see if we can omit the quotes.
2664    */
2665   if (rv->i >= 0) {
2666     if (f&f_semi) gprintf(gops, go, " = ");
2667     switch (rv->i) {
2668       case ' ': case '\\': case '\'': quote:
2669         format_char(gops, go, rv->i);
2670         break;
2671       default:
2672         if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
2673         gprintf(gops, go, "%c", (int)rv->i);
2674         return;
2675     }
2676   }
2677
2678   /* And the character code as an integer. */
2679   if (!(style&TVSF_COMPACT)) {
2680     if (!(f&f_semi)) gprintf(gops, go, " ;");
2681     gprintf(gops, go, " = %ld = ", rv->i);
2682     format_signed_hex(gops, go, rv->i);
2683   }
2684
2685 #undef f_semi
2686 }
2687
2688 /* Character type definition. */
2689 const struct tvec_regty tvty_char = {
2690   init_int, trivial_release, eq_int,
2691   tobuf_char, frombuf_char,
2692   parse_char, dump_char
2693 };
2694
2695 /* --- @tvec_claimeq_char@ --- *
2696  *
2697  * Arguments:   @struct tvec_state *tv@ = test-vector state
2698  *              @int ch0, ch1@ = two character codes
2699  *              @const char *file@, @unsigned @lno@ = calling file and line
2700  *              @const char *expr@ = the expression to quote on failure
2701  *
2702  * Returns:     Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
2703  *
2704  * Use:         Check that values of @ch0@ and @ch1@ are equal.  As for
2705  *              @tvec_claim@ above, a test case is automatically begun and
2706  *              ended if none is already underway.  If the values are
2707  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
2708  *              mismatched values are dumped: @ch0@ is printed as the output
2709  *              value and @ch1@ is printed as the input reference.
2710  */
2711
2712 int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
2713                       const char *file, unsigned lno, const char *expr)
2714 {
2715   tv->out[0].v.i = c0; tv->in[0].v.i = c1;
2716   return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
2717 }
2718
2719 /*----- Text and byte strings ---------------------------------------------*/
2720
2721 /* --- @init_text@, @init_bytes@ --- *
2722  *
2723  * Arguments:   @union tvec_regval *rv@ = register value
2724  *              @const struct tvec_regdef *rd@ = register definition
2725  *
2726  * Returns:     ---
2727  *
2728  * Use:         Initialize a register value.
2729  *
2730  *              Text and binary string values are initialized with a null
2731  *              pointer and zero length.
2732  */
2733
2734 static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
2735   { rv->text.p = 0; rv->text.sz = 0; }
2736
2737 static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
2738   { rv->bytes.p = 0; rv->bytes.sz = 0; }
2739
2740 /* --- @release_string@, @release_bytes@ --- *
2741  *
2742  * Arguments:   @const union tvec_regval *rv@ = register value
2743  *              @const struct tvec_regdef *rd@ = register definition
2744  *
2745  * Returns:     ---
2746  *
2747  * Use:         Release resources held by a register value.
2748  *
2749  *              Text and binary string buffers are freed.
2750  */
2751
2752 static void release_text(union tvec_regval *rv,
2753                          const struct tvec_regdef *rd)
2754   { xfree(rv->text.p); }
2755
2756 static void release_bytes(union tvec_regval *rv,
2757                           const struct tvec_regdef *rd)
2758   { xfree(rv->bytes.p); }
2759
2760 /* --- @eq_text@, @eq_bytes@ --- *
2761  *
2762  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
2763  *              @const struct tvec_regdef *rd@ = register definition
2764  *
2765  * Returns:     Nonzero if the values are equal, zero if unequal
2766  *
2767  * Use:         Compare register values for equality.
2768  */
2769
2770 static int eq_text(const union tvec_regval *rv0,
2771                    const union tvec_regval *rv1,
2772                    const struct tvec_regdef *rd)
2773 {
2774   return (rv0->text.sz == rv1->text.sz &&
2775           (!rv0->text.sz ||
2776            MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
2777 }
2778
2779 static int eq_bytes(const union tvec_regval *rv0,
2780                     const union tvec_regval *rv1,
2781                     const struct tvec_regdef *rd)
2782 {
2783   return (rv0->bytes.sz == rv1->bytes.sz &&
2784           (!rv0->bytes.sz ||
2785            MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
2786 }
2787
2788 /* --- @tobuf_text@, @tobuf_bytes@ --- *
2789  *
2790  * Arguments:   @buf *b@ = buffer
2791  *              @const union tvec_regval *rv@ = register value
2792  *              @const struct tvec_regdef *rd@ = register definition
2793  *
2794  * Returns:     Zero on success, %$-1$% on failure.
2795  *
2796  * Use:         Serialize a register value to a buffer.
2797  *
2798  *              Text and binary string values are serialized as a little-
2799  *              endian 64-bit length %$n$% in bytes followed by %$n$% bytes
2800  *              of string data.
2801  */
2802
2803 static int tobuf_text(buf *b, const union tvec_regval *rv,
2804                       const struct tvec_regdef *rd)
2805   { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }
2806
2807 static int tobuf_bytes(buf *b, const union tvec_regval *rv,
2808                        const struct tvec_regdef *rd)
2809   { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }
2810
2811 /* --- @frombuf_text@, @frombuf_bytes@ --- *
2812  *
2813  * Arguments:   @buf *b@ = buffer
2814  *              @union tvec_regval *rv@ = register value
2815  *              @const struct tvec_regdef *rd@ = register definition
2816  *
2817  * Returns:     Zero on success, %$-1$% on failure.
2818  *
2819  * Use:         Deserialize a register value from a buffer.
2820  *
2821  *              Text and binary string values are serialized as a little-
2822  *              endian 64-bit length %$n$% in bytes followed by %$n$% bytes
2823  *              of string data.
2824  */
2825
2826 static int frombuf_text(buf *b, union tvec_regval *rv,
2827                         const struct tvec_regdef *rd)
2828 {
2829   const void *p;
2830   size_t sz;
2831
2832   p = buf_getmem64l(b, &sz); if (!p) return (-1);
2833   tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
2834   return (0);
2835 }
2836
2837 static int frombuf_bytes(buf *b, union tvec_regval *rv,
2838                          const struct tvec_regdef *rd)
2839 {
2840   const void *p;
2841   size_t sz;
2842
2843   p = buf_getmem64l(b, &sz); if (!p) return (-1);
2844   tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
2845   return (0);
2846 }
2847
2848 /* --- @check_string_length@ --- *
2849  *
2850  * Arguments:   @size_t sz@ = found string length
2851  *              @const struct tvec_urange *ur@ = acceptable range
2852  *              @struct tvec_state *tv@ = test-vector state
2853  *
2854  * Returns:     Zero on success, %$-1$% on error.
2855  *
2856  * Use:         Checks that @sz@ is within the bounds described by @ur@,
2857  *              reporting an error if not.
2858  */
2859
2860 static int check_string_length(size_t sz, const struct tvec_urange *ur,
2861                                struct tvec_state *tv)
2862 {
2863   if (ur && (ur->min > sz || sz > ur->max))
2864     return (tvec_error(tv,
2865                        "invalid string length %lu; must be in [%lu .. %lu]",
2866                        (unsigned long)sz, ur->min, ur->max));
2867   return (0);
2868 }
2869
2870 /* --- @parse_text@, @parse_bytes@ --- *
2871  *
2872  * Arguments:   @union tvec_regval *rv@ = register value
2873  *              @const struct tvec_regdef *rd@ = register definition
2874  *              @struct tvec_state *tv@ = test-vector state
2875  *
2876  * Returns:     Zero on success, %$-1$% on error.
2877  *
2878  * Use:         Parse a register value from an input file.
2879  *
2880  *              The input format for both kinds of strings is basically the
2881  *              same: a `compound string', consisting of
2882  *
2883  *                * single-quoted strings, which are interpreted entirely
2884  *                  literally, but can't contain single quotes or newlines;
2885  *
2886  *                * double-quoted strings, in which `%|\|%'-escapes are
2887  *                  interpreted as for characters;
2888  *
2889  *                * character names, marked by an initial `%|#|%' sign;
2890  *
2891  *                * special tokens marked by an initial `%|!|%' sign; or
2892  *
2893  *                * barewords interpreted according to the current coding
2894  *                  scheme.
2895  *
2896  *              The special tokens are
2897  *
2898  *                * `%|!bare|%', which causes subsequent sequences of
2899  *                  barewords to be treated as plain text;
2900  *
2901  *                * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
2902  *                  subsequent barewords to be decoded in the requested
2903  *                  manner.
2904  *
2905  *                * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
2906  *                  which includes %$n$% copies of the (compound) string.
2907  *
2908  *              The only difference between text and binary strings is that
2909  *              the initial coding scheme is %|bare|% for text strings and
2910  *              %|hex|% for binary strings.
2911  */
2912
2913 static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
2914                       struct tvec_state *tv)
2915 {
2916   void *p = rv->text.p;
2917
2918   if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
2919     return (-1);
2920   rv->text.p = p;
2921   if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
2922   return (0);
2923 }
2924
2925 static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
2926                        struct tvec_state *tv)
2927 {
2928   void *p = rv->bytes.p;
2929
2930   if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
2931     return (-1);
2932   rv->bytes.p = p;
2933   if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
2934   return (0);
2935 }
2936
2937 /* --- @dump_text@, @dump_bytes@ --- *
2938  *
2939  * Arguments:   @const union tvec_regval *rv@ = register value
2940  *              @const struct tvec_regdef *rd@ = register definition
2941  *              @unsigned style@ = output style (@TVSF_...@)
2942  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
2943  *
2944  * Returns:     ---
2945  *
2946  * Use:         Dump a register value to the format output.
2947  *
2948  *              Text string values are dumped as plain text, in double quotes
2949  *              if necessary, and using backslash escape sequences for
2950  *              nonprintable characters.  Unless compact output is requested,
2951  *              strings consisting of multiple lines are dumped with each
2952  *              line of the string on a separate output line.
2953  *
2954  *              Binary string values are dumped in hexadecimal.  In compact
2955  *              style, the output simply consists of a single block of hex
2956  *              digits.  Otherwise, the dump is a display consisting of
2957  *              groups of hex digits, with comments showing the offset (if
2958  *              the string is long enough) and the corresponding plain text.
2959  *
2960  *              Empty strings are dumped as %|""|%.
2961  */
2962
2963 static void dump_text(const union tvec_regval *rv,
2964                       const struct tvec_regdef *rd,
2965                       unsigned style,
2966                       const struct gprintf_ops *gops, void *go)
2967 {
2968   const unsigned char *p, *q, *l;
2969   unsigned f = 0;
2970 #define f_nonword 1u
2971 #define f_newline 2u
2972
2973   if (!rv->text.sz) { gprintf(gops, go, "\"\""); return; }
2974
2975   p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
2976   switch (*p) {
2977     case '!': case '#': case ';': case '"': case '\'':
2978     case '(': case '{': case '[': case ']': case '}': case ')':
2979       f |= f_nonword; break;
2980   }
2981   for (q = p; q < l; q++)
2982     if (*q == '\n' && q != l - 1) f |= f_newline;
2983     else if (!*q || !isgraph(*q) || *q == '\\') f |= f_nonword;
2984   if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
2985   else if (f&f_nonword) goto quote;
2986
2987   gops->putm(go, (const char *)p, rv->text.sz);
2988   return;
2989
2990 quote:
2991   gprintf(gops, go, "\"");
2992   for (q = p; q < l; q++)
2993     if (!isprint(*q) || *q == '"') {
2994       if (p < q) gops->putm(go, (const char *)p, q - p);
2995       if (*q != '\n' || (style&TVSF_COMPACT))
2996         format_charesc(gops, go, *q, FCF_BRACE);
2997       else {
2998         if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; }
2999         else gprintf(gops, go, "\\n\"\n\t\"");
3000       }
3001       p = q + 1;
3002     }
3003   if (p < q) gops->putm(go, (const char *)p, q - p);
3004   gprintf(gops, go, "\"");
3005
3006 #undef f_nonword
3007 #undef f_newline
3008 }
3009
3010 static void dump_bytes(const union tvec_regval *rv,
3011                        const struct tvec_regdef *rd,
3012                        unsigned style,
3013                        const struct gprintf_ops *gops, void *go)
3014 {
3015   const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
3016   size_t off, sz = rv->bytes.sz;
3017   unsigned i, n;
3018   int wd;
3019
3020   if (!sz) {
3021     gprintf(gops, go, style&TVSF_COMPACT ? "\"\"" : "\"\" ; empty");
3022     return;
3023   }
3024
3025   if (style&TVSF_COMPACT) {
3026     while (p < l) gprintf(gops, go, "%02x", *p++);
3027     return;
3028   }
3029
3030   if (sz > 16) gprintf(gops, go, "\n\t");
3031
3032   off = 0; wd = hex_width(sz);
3033   while (p < l) {
3034     if (l - p < 16) n = l - p;
3035     else n = 16;
3036
3037     for (i = 0; i < n; i++) {
3038       if (i < n) gprintf(gops, go, "%02x", p[i]);
3039       else gprintf(gops, go, "  ");
3040       if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
3041     }
3042     gprintf(gops, go, " ; ");
3043     if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
3044     for (i = 0; i < n; i++)
3045       gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
3046     p += n; off += n;
3047     if (p < l) gprintf(gops, go, "\n\t");
3048   }
3049 }
3050
3051 /* Text and byte string type definitions. */
3052 const struct tvec_regty tvty_text = {
3053   init_text, release_text, eq_text,
3054   tobuf_text, frombuf_text,
3055   parse_text, dump_text
3056 };
3057 const struct tvec_regty tvty_bytes = {
3058   init_bytes, release_bytes, eq_bytes,
3059   tobuf_bytes, frombuf_bytes,
3060   parse_bytes, dump_bytes
3061 };
3062
3063 /* --- @tvec_claimeq_text@ --- *
3064  *
3065  * Arguments:   @struct tvec_state *tv@ = test-vector state
3066  *              @const char *p0@, @size_t sz0@ = first string with length
3067  *              @const char *p1@, @size_t sz1@ = second string with length
3068  *              @const char *file@, @unsigned @lno@ = calling file and line
3069  *              @const char *expr@ = the expression to quote on failure
3070  *
3071  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3072  *              zero.
3073  *
3074  * Use:         Check that strings at @p0@ and @p1@ are equal.  As for
3075  *              @tvec_claim@ above, a test case is automatically begun and
3076  *              ended if none is already underway.  If the values are
3077  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
3078  *              mismatched values are dumped: @p0@ is printed as the output
3079  *              value and @p1@ is printed as the input reference.
3080  */
3081
3082 int tvec_claimeq_text(struct tvec_state *tv,
3083                       const char *p0, size_t sz0,
3084                       const char *p1, size_t sz1,
3085                       const char *file, unsigned lno, const char *expr)
3086 {
3087   tv->out[0].v.text.p = (/*unconst*/ char *)p0; tv->out[0].v.text.sz = sz0;
3088   tv->in[0].v.text.p =(/*unconst*/ char *) p1; tv->in[0].v.text.sz = sz1;
3089   return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
3090 }
3091
3092 /* --- @tvec_claimeq_textz@ --- *
3093  *
3094  * Arguments:   @struct tvec_state *tv@ = test-vector state
3095  *              @const char *p0, *p1@ = two strings to compare
3096  *              @const char *file@, @unsigned @lno@ = calling file and line
3097  *              @const char *expr@ = the expression to quote on failure
3098  *
3099  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3100  *              zero.
3101  *
3102  * Use:         Check that strings at @p0@ and @p1@ are equal, as for
3103  *              @tvec_claimeq_string@, except that the strings are assumed
3104  *              null-terminated, so their lengths don't need to be supplied
3105  *              explicitly.
3106  */
3107
3108 int tvec_claimeq_textz(struct tvec_state *tv,
3109                        const char *p0, const char *p1,
3110                        const char *file, unsigned lno, const char *expr)
3111 {
3112   tv->out[0].v.text.p = (/*unconst*/ char *)p0;
3113     tv->out[0].v.text.sz = strlen(p0);
3114   tv->in[0].v.text.p = (/*unconst*/ char *)p1;
3115     tv->in[0].v.text.sz = strlen(p1);
3116   return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
3117 }
3118
3119 /* --- @tvec_claimeq_bytes@ --- *
3120  *
3121  * Arguments:   @struct tvec_state *tv@ = test-vector state
3122  *              @const void *p0@, @size_t sz0@ = first string with length
3123  *              @const void *p1@, @size_t sz1@ = second string with length
3124  *              @const char *file@, @unsigned @lno@ = calling file and line
3125  *              @const char *expr@ = the expression to quote on failure
3126  *
3127  * Returns:     Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3128  *              zero.
3129  *
3130  * Use:         Check that binary strings at @p0@ and @p1@ are equal.  As for
3131  *              @tvec_claim@ above, a test case is automatically begun and
3132  *              ended if none is already underway.  If the values are
3133  *              unequal, then @tvec_fail@ is called, quoting @expr@, and the
3134  *              mismatched values are dumped: @p0@ is printed as the output
3135  *              value and @p1@ is printed as the input reference.
3136  */
3137
3138 int tvec_claimeq_bytes(struct tvec_state *tv,
3139                        const void *p0, size_t sz0,
3140                        const void *p1, size_t sz1,
3141                        const char *file, unsigned lno, const char *expr)
3142 {
3143   tv->out[0].v.bytes.p = (/*unconst*/ void *)p0;
3144     tv->out[0].v.bytes.sz = sz0;
3145   tv->in[0].v.bytes.p = (/*unconst*/ void *)p1;
3146     tv->in[0].v.bytes.sz = sz1;
3147   return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
3148 }
3149
3150 /* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
3151  *
3152  * Arguments:   @union tvec_regval *rv@ = register value
3153  *              @size_t sz@ = required size
3154  *
3155  * Returns:     ---
3156  *
3157  * Use:         Allocated space in a text or binary string register.  If the
3158  *              current register size is sufficient, its buffer is left
3159  *              alone; otherwise, the old buffer, if any, is freed and a
3160  *              fresh buffer allocated.  These functions are not intended to
3161  *              be used to adjust a buffer repeatedly, e.g., while building
3162  *              output incrementally: (a) they will perform badly, and (b)
3163  *              the old buffer contents are simply discarded if reallocation
3164  *              is necessary.  Instead, use a @dbuf@ or @dstr@.
3165  *
3166  *              The @tvec_alloctext@ function sneakily allocates an extra
3167  *              byte for a terminating zero.  The @tvec_allocbytes@ function
3168  *              doesn't do this.
3169  */
3170
3171 void tvec_alloctext(union tvec_regval *rv, size_t sz)
3172 {
3173   if (rv->text.sz <= sz) { xfree(rv->text.p); rv->text.p = xmalloc(sz + 1); }
3174   rv->text.sz = sz;
3175 }
3176
3177 void tvec_allocbytes(union tvec_regval *rv, size_t sz)
3178 {
3179   if (rv->bytes.sz < sz) { xfree(rv->bytes.p); rv->bytes.p = xmalloc(sz); }
3180   rv->bytes.sz = sz;
3181 }
3182
3183 /*----- Buffer type -------------------------------------------------------*/
3184
3185 /* Buffers are initialized and released as binary strings. */
3186
3187 /* --- @eq_buffer@ --- *
3188  *
3189  * Arguments:   @const union tvec_regval *rv0, *rv1@ = register values
3190  *              @const struct tvec_regdef *rd@ = register definition
3191  *
3192  * Returns:     Nonzero if the values are equal, zero if unequal
3193  *
3194  * Use:         Compare register values for equality.
3195  *
3196  *              Buffer values are equal if and only if their sizes are equal;
3197  *              their contents are %%\emph{not}%% compared.
3198  */
3199
3200 static int eq_buffer(const union tvec_regval *rv0,
3201                      const union tvec_regval *rv1,
3202                      const struct tvec_regdef *rd)
3203   { return (rv0->bytes.sz == rv1->bytes.sz); }
3204
3205 /* --- @tobuf_buffer@ --- *
3206  *
3207  * Arguments:   @buf *b@ = buffer
3208  *              @const union tvec_regval *rv@ = register value
3209  *              @const struct tvec_regdef *rd@ = register definition
3210  *
3211  * Returns:     Zero on success, %$-1$% on failure.
3212  *
3213  * Use:         Serialize a register value to a buffer.
3214  *
3215  *              Buffer values are serialized as just their lengths, as
3216  *              unsigned integers.
3217  */
3218
3219 static int tobuf_buffer(buf *b, const union tvec_regval *rv,
3220                          const struct tvec_regdef *rd)
3221   { return (unsigned_to_buf(b, rv->bytes.sz)); }
3222
3223 /* --- @allocate_buffer@ --- *
3224  *
3225  * Arguments:   @union tvec_regval *rv@ = register value
3226  *              @size_t sz@ = size to allocate
3227  *
3228  * Returns:     ---
3229  *
3230  * Use:         Allocate @sz@ bytes to the buffer and fill the space with a
3231  *              distinctive pattern.
3232  */
3233
3234 static void allocate_buffer(union tvec_regval *rv, size_t sz)
3235   { tvec_allocbytes(rv, sz); memset(rv->bytes.p, '?', sz); }
3236
3237 /* --- @frombuf_buffer@ --- *
3238  *
3239  * Arguments:   @buf *b@ = buffer
3240  *              @union tvec_regval *rv@ = register value
3241  *              @const struct tvec_regdef *rd@ = register definition
3242  *
3243  * Returns:     Zero on success, %$-1$% on failure.
3244  *
3245  * Use:         Deserialize a register value from a buffer.
3246  *
3247  *              Buffer values are serialized as just their lengths, as
3248  *              unsigned integers.  The buffer is allocated on
3249  *              deserialization and filled with a distinctive pattern.
3250  */
3251
3252 static int frombuf_buffer(buf *b, union tvec_regval *rv,
3253                           const struct tvec_regdef *rd)
3254 {
3255   unsigned long u;
3256
3257   if (unsigned_from_buf(b, &u)) return (-1);
3258   if (u > (size_t)-1) return (-1);
3259   allocate_buffer(rv, u);
3260   return (0);
3261 }
3262
3263 /* --- @parse_buffer@ --- *
3264  *
3265  * Arguments:   @union tvec_regval *rv@ = register value
3266  *              @const struct tvec_regdef *rd@ = register definition
3267  *              @struct tvec_state *tv@ = test-vector state
3268  *
3269  * Returns:     Zero on success, %$-1$% on error.
3270  *
3271  * Use:         Parse a register value from an input file.
3272  *
3273  *              The input format for a buffer value consists of an unsigned
3274  *              integer followed by an optional unit specifier consisting of
3275  *              an SI unit prefix and (optionally) the letter `B'.  Unit
3276  *              prefixes denote %%\emph{binary}%% multipliers, not decimal.
3277  *
3278  *              The buffer is allocated and filled with a distinctive
3279  *              pattern.
3280  */
3281
3282 static const char units[] = "kMGTPEZY";
3283
3284 static int parse_buffer(union tvec_regval *rv,
3285                         const struct tvec_regdef *rd,
3286                         struct tvec_state *tv)
3287 {
3288   dstr d = DSTR_INIT;
3289   const char *q, *unit;
3290   size_t pos;
3291   unsigned long u, t;
3292   int rc;
3293   unsigned f = 0;
3294 #define f_range 1u
3295
3296   if (tvec_readword(tv, &d, ";", "buffer length")) { rc = -1; goto end; }
3297   if (parse_unsigned_integer(&u, &q, d.buf)) goto bad;
3298   if (!*q) {
3299     tvec_skipspc(tv); pos = d.len;
3300     if (!tvec_readword(tv, &d, ";", 0)) pos++;
3301     q = d.buf + pos;
3302   }
3303
3304   if (u > (size_t)-1) goto rangerr;
3305   for (t = u, unit = units; *unit; unit++) {
3306     if (t > (size_t)-1/1024) f |= f_range;
3307     else t *= 1024;
3308     if (*q == *unit) {
3309       if (f&f_range) goto rangerr;
3310       u = t; q++; break;
3311     }
3312   }
3313   if (*q == 'B') q++;
3314   if (*q) goto bad;
3315   if (check_string_length(u, rd->arg.p, tv)) { rc = -1; goto end; }
3316
3317   if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
3318   allocate_buffer(rv, u);
3319   rc = 0;
3320 end:
3321   DDESTROY(&d); return (rc);
3322
3323 bad:
3324   tvec_error(tv, "invalid buffer length `%s'", d.buf);
3325   rc = -1; goto end;
3326
3327 rangerr:
3328   tvec_error(tv, "buffer length `%s' out of range", d.buf);
3329   rc = -1; goto end;
3330
3331 #undef f_range
3332 }
3333
3334 /* --- @dump_buffer@ --- *
3335  *
3336  * Arguments:   @const union tvec_regval *rv@ = register value
3337  *              @const struct tvec_regdef *rd@ = register definition
3338  *              @unsigned style@ = output style (@TVSF_...@)
3339  *              @const struct gprintf_ops *gops@, @void *gp@ = format output
3340  *
3341  * Returns:     ---
3342  *
3343  * Use:         Dump a register value to the format output.
3344  *
3345  *              Buffer values are dumped as their size with an appropriate
3346  *              unit specifier.  A unit prefix is only used if the size is an
3347  *              exact multiple of the relevant power of two.
3348  */
3349
3350 static void dump_buffer(const union tvec_regval *rv,
3351                         const struct tvec_regdef *rd,
3352                         unsigned style,
3353                         const struct gprintf_ops *gops, void *go)
3354 {
3355   const char *unit;
3356   unsigned long u = rv->bytes.sz;
3357
3358   if (!u || u%1024)
3359     gprintf(gops, go, "%lu B", u);
3360   else {
3361     for (unit = units, u /= 1024; !(u%1024) && unit[1]; u /= 1024, unit++);
3362     gprintf(gops, go, "%lu %cB", u, *unit);
3363   }
3364 }
3365
3366 /* Buffer type definition. */
3367 const struct tvec_regty tvty_buffer = {
3368   init_bytes, release_bytes, eq_buffer,
3369   tobuf_buffer, frombuf_buffer,
3370   parse_buffer, dump_buffer
3371 };
3372
3373 /*----- That's all, folks -------------------------------------------------*/