chiark - git - mdw - mLib/blob - utils/gprintf.c

   1 /* -*-c-*-
   2  *
   3  * Generalized string formatting
   4  *
   5  * (c) 2023 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of the mLib utilities library.
  11  *
  12  * mLib is free software: you can redistribute it and/or modify it under
  13  * the terms of the GNU Library General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or (at
  15  * your option) any later version.
  16  *
  17  * mLib is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20  * License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with mLib.  If not, write to the Free Software
  24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25  * USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include "config.h"
  31
  32 #include <assert.h>
  33 #include <ctype.h>
  34 #include <limits.h>
  35 #include <stdarg.h>
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39
  40 #ifdef HAVE_FLOAT_H
  41 #  include <float.h>
  42 #endif
  43
  44 #ifdef HAVE_STDINT_H
  45 #  include <stdint.h>
  46 #endif
  47
  48 #include "darray.h"
  49 #include "dstr.h"
  50 #include "gprintf.h"
  51 #include "macros.h"
  52
  53 /*----- Tunable constants -------------------------------------------------*/
  54
  55 /* For each format specifier, at least @STEP@ bytes are ensured before
  56  * writing the formatted result.
  57  */
  58
  59 #define STEP 64                         /* Buffer size for @vgprintf@ */
  60
  61 /*----- Preliminary definitions -------------------------------------------*/
  62
  63 #ifdef HAVE_FLOAT_H
  64 #  define IF_FLOAT(x) x
  65 #else
  66 #  define IF_FLOAT(x)
  67 #endif
  68
  69 #if defined(LLONG_MAX) || defined(LONG_LONG_MAX)
  70 #  define IF_LONGLONG(x) x
  71 #else
  72 #  define IF_LONGLONG(x)
  73 #endif
  74
  75 #ifdef INTMAX_MAX
  76 #  define IF_INTMAX(x) x
  77 #else
  78 #  define IF_INTMAX(x)
  79 #endif
  80
  81 #define OUTPUT_FMTTYPES(_)                                              \
  82   _(i, unsigned int)                                                    \
  83   _(li, unsigned long)                                                  \
  84   IF_LONGLONG( _(lli, unsigned long long) )                             \
  85   _(zi, size_t)                                                         \
  86   _(ti, ptrdiff_t)                                                      \
  87   IF_INTMAX( _(ji, uintmax_t) )                                         \
  88   _(s, char *)                                                          \
  89   _(p, void *)                                                          \
  90   _(f, double)                                                          \
  91   _(Lf, long double)
  92
  93 #define PERCENT_N_FMTTYPES(_)                                           \
  94   _(n, int *)                                                           \
  95   _(hhn, char *)                                                        \
  96   _(hn, short *)                                                        \
  97   _(ln, long *)                                                         \
  98   _(zn, size_t *)                                                       \
  99   _(tn, ptrdiff_t *)                                                    \
 100   IF_LONGLONG( _(lln, long long *) )                                    \
 101   IF_INTMAX( _(jn, intmax_t *) )
 102
 103 #define FMTTYPES(_)                                                     \
 104   OUTPUT_FMTTYPES(_)                                                    \
 105   PERCENT_N_FMTTYPES(_)
 106
 107 enum {
 108   fmt_unset = 0,
 109 #define CODE(code, ty) fmt_##code,
 110   FMTTYPES(CODE)
 111 #undef CODE
 112   fmt__limit
 113 };
 114
 115 struct fmtarg {
 116   int fmt;
 117   union {
 118 #define MEMB(code, ty) ty code;
 119     FMTTYPES(MEMB)
 120 #undef MEMB
 121   } u;
 122 };
 123
 124 DA_DECL(fmtarg_v, struct fmtarg);
 125
 126 enum {
 127   len_std = 0,
 128   len_hh,
 129   len_h,
 130   len_l,
 131   len_ll,
 132   len_z,
 133   len_t,
 134   len_j,
 135   len_L
 136 };
 137
 138 #define f_len           0x000fu
 139 #define f_wd            0x0010u
 140 #define f_wdarg         0x0020u
 141 #define f_prec          0x0040u
 142 #define f_precarg       0x0080u
 143 #define f_plus          0x0100u
 144 #define f_minus         0x0200u
 145 #define f_sharp         0x0400u
 146 #define f_zero          0x0800u
 147 #define f_posarg        0x1000u
 148
 149 struct fmtspec {
 150   const char *p;
 151   size_t n;
 152   unsigned f;
 153   int fmt, ch;
 154   int wd, prec;
 155   int arg;
 156 };
 157
 158 DA_DECL(fmtspec_v, struct fmtspec);
 159
 160 /*----- Main code ---------------------------------------------------------*/
 161
 162 /* --- @vgprintf@ --- *
 163  *
 164  * Arguments:   @const struct gprintf_ops *ops@ = output operations
 165  *              @void *out@ = context for output operations
 166  *              @const char *p@ = pointer to @printf@-style format string
 167  *              @va_list *ap@ = argument handle
 168  *
 169  * Returns:     The number of characters written to the string.
 170  *
 171  * Use:         As for @gprintf@, but takes a reified argument tail.
 172  */
 173
 174 static void set_arg(fmtarg_v *av, size_t i, int fmt)
 175 {
 176   size_t j, n;
 177
 178   n = DA_LEN(av);
 179   if (i >= n) {
 180     DA_ENSURE(av, i + 1 - n);
 181     for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
 182     DA_UNSAFE_EXTEND(av, i + 1 - n);
 183   }
 184
 185   if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
 186   else assert(DA(av)[i].fmt == fmt);
 187 }
 188
 189 int vgprintf(const struct gprintf_ops *ops, void *out,
 190              const char *p, va_list *ap)
 191 {
 192   size_t sz, mx, n;
 193   dstr dd = DSTR_INIT;
 194   fmtspec_v sv = DA_INIT;
 195   fmtarg_v av = DA_INIT;
 196   struct fmtarg *fa, *fal;
 197   struct fmtspec *fs, *fsl;
 198   unsigned f;
 199   int i, anext, tot = 0;
 200   int wd, prec;
 201
 202   /* --- Initial pass through the input, parsing format specifiers --- *
 203    *
 204    * We essentially compile the format string into a vector of @fmtspec@
 205    * objects, each of which represents a chunk of literal text followed by a
 206    * (possibly imaginary, in the case of the final one) formatting directive.
 207    * Output then simply consists of interpreting these specifiers in order.
 208    */
 209
 210   anext = 0;
 211
 212   while (*p) {
 213     f = 0;
 214     DA_ENSURE(&sv, 1);
 215     fs = &DA(&sv)[DA_LEN(&sv)];
 216     DA_UNSAFE_EXTEND(&sv, 1);
 217
 218     /* --- Find the end of this literal portion --- */
 219
 220     fs->p = p;
 221     while (*p && *p != '%') p++;
 222     fs->n = p - fs->p;
 223
 224     /* --- Some simple cases --- *
 225      *
 226      * We might have reached the end of the string, or maybe a `%%' escape.
 227      */
 228
 229     if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
 230     p++;
 231     if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
 232
 233     /* --- Pick up initial flags --- */
 234
 235   flags:
 236     for (;;) {
 237       switch (*p) {
 238         case '+': f |= f_plus; break;
 239         case '-': f |= f_minus; break;
 240         case '#': f |= f_sharp; break;
 241         case '0': f |= f_zero; break;
 242         default: goto done_flags;
 243       }
 244       p++;
 245     }
 246
 247     /* --- Pick up the field width --- */
 248
 249   done_flags:
 250     i = 0;
 251     while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
 252
 253     /* --- Snag: this might have been an argument position indicator --- */
 254
 255     if (i && *p == '$' && (!f || f == f_zero)) {
 256       f |= f_posarg;
 257       fs->arg = i - 1;
 258       p++;
 259       goto flags;
 260     }
 261
 262     /* --- Set the field width --- *
 263      *
 264      * If @i@ is nonzero here then we have a numeric field width.  Otherwise
 265      * it might be `*', maybe with an explicit argument number.
 266      */
 267
 268     if (i) {
 269       f |= f_wd;
 270       fs->wd = i;
 271     } else if (*p == '*') {
 272       p++;
 273       if (!ISDIGIT(*p))
 274         i = anext++;
 275       else {
 276         i = *p++ - '0';
 277         while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
 278         assert(*p == '$'); p++;
 279         assert(i > 0); i--;
 280       }
 281       f |= f_wd | f_wdarg;
 282       set_arg(&av, i, fmt_i); fs->wd = i;
 283     }
 284
 285     /* --- Maybe we have a precision spec --- */
 286
 287     if (*p == '.') {
 288       p++;
 289       f |= f_prec;
 290       if (ISDIGIT(*p)) {
 291         i = *p++ - '0';
 292         while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
 293         fs->prec = i;
 294       } else if (*p != '*')
 295         fs->prec = 0;
 296       else {
 297         p++;
 298         if (!ISDIGIT(*p))
 299           i = anext++;
 300         else {
 301           i = *p++ - '0';
 302           while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
 303           assert(*p == '$'); p++;
 304           assert(i > 0); i--;
 305         }
 306         f |= f_precarg;
 307         set_arg(&av, i, fmt_i); fs->prec = i;
 308       }
 309     }
 310
 311     /* --- Maybe some length flags --- */
 312
 313     switch (*p) {
 314       case 'h':
 315         p++;
 316         if (*p == 'h') { f |= len_hh; p++; } else f |= len_h;
 317         break;
 318       case 'l':
 319         p++;
 320         IF_LONGLONG( if (*p == 'l') { f |= len_ll; p++; } else ) f |= len_l;
 321         break;
 322       case 'L': f |= len_L; p++; break;
 323       case 'z': f |= len_z; p++; break;
 324       case 't': f |= len_t; p++; break;
 325       IF_INTMAX( case 'j': f |= len_j; p++; break; )
 326     }
 327
 328     /* --- The flags are now ready --- */
 329
 330     fs->f = f;
 331
 332     /* --- At the end, an actual directive --- */
 333
 334     fs->ch = *p;
 335     switch (*p++) {
 336       case '%':
 337         fs->fmt = fmt_unset;
 338         break;
 339       case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
 340         switch (f&f_len) {
 341           case len_l: fs->fmt = fmt_li; break;
 342           case len_z: fs->fmt = fmt_zi; break;
 343           case len_t: fs->fmt = fmt_ti; break;
 344           IF_LONGLONG( case len_ll: fs->fmt = fmt_lli; break; )
 345           IF_INTMAX( case len_j: fs->fmt = fmt_ji; break; )
 346           default: fs->fmt = fmt_i;
 347         }
 348         break;
 349       case 'a': case 'A':
 350       case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
 351         fs->fmt = (f&f_len) == len_L ? fmt_Lf : fmt_f;
 352         break;
 353       case 'c':
 354         fs->fmt = fmt_i;
 355         break;
 356       case 's':
 357         fs->fmt = fmt_s;
 358         break;
 359       case 'p':
 360         fs->fmt = fmt_p;
 361         break;
 362       case 'n':
 363         switch (f&f_len) {
 364           case len_hh: fs->fmt = fmt_hhn; break;
 365           case len_h: fs->fmt = fmt_hn; break;
 366           case len_l: fs->fmt = fmt_ln; break;
 367           case len_z: fs->fmt = fmt_zn; break;
 368           case len_t: fs->fmt = fmt_tn; break;
 369           IF_LONGLONG( case len_ll: fs->fmt = fmt_lln; break; )
 370           IF_INTMAX( case len_j: fs->fmt = fmt_jn; break; )
 371           default: fs->fmt = fmt_n;
 372         }
 373         break;
 374       default:
 375         fprintf(stderr,
 376                 "FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]);
 377         abort();
 378     }
 379
 380     /* --- Finally sort out the argument --- *
 381      *
 382      * If we don't have explicit argument positions then this comes after the
 383      * width and precision; and we don't know the type code until we've
 384      * parsed the specifier, so this seems the right place to handle it.
 385      */
 386
 387     if (!(f&f_posarg)) fs->arg = anext++;
 388     set_arg(&av, fs->arg, fs->fmt);
 389   }
 390
 391   /* --- Quick pass over the argument vector to collect the arguments --- */
 392
 393   for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
 394     switch (fa->fmt) {
 395 #define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
 396       FMTTYPES(CASE)
 397 #undef CASE
 398       default: abort();
 399     }
 400   }
 401
 402   /* --- Final pass through the format string to produce output --- */
 403
 404   fa = DA(&av);
 405   for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
 406     f = fs->f;
 407
 408     /* --- Output the literal portion --- */
 409
 410     if (fs->n) {
 411       if (ops->putm(out, fs->p, fs->n)) return (-1);
 412       tot += fs->n;
 413     }
 414
 415     /* --- And now the variable portion --- */
 416
 417     if (fs->fmt == fmt_unset) {
 418       switch (fs->ch) {
 419         case 0: break;
 420         case '%': ops->putch(out, '%'); break;
 421         default: abort();
 422       }
 423       continue;
 424     }
 425
 426     DRESET(&dd);
 427     DPUTC(&dd, '%');
 428
 429     /* --- Resolve the width and precision --- */
 430
 431     if (!(f&f_wd))
 432       wd = 0;
 433     else {
 434       wd = (fs->f&f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd;
 435       if (wd < 0) { wd = -wd; f |= f_minus; }
 436     }
 437
 438     if (!(f&f_prec))
 439       prec = 0;
 440     else {
 441       prec = (fs->f&f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec;
 442       if (prec < 0) { prec = 0; f &= ~f_prec; }
 443     }
 444
 445     /* --- Write out the flags, width and precision --- */
 446
 447     if (f&f_plus) DPUTC(&dd, '+');
 448     if (f&f_minus) DPUTC(&dd, '-');
 449     if (f&f_sharp) DPUTC(&dd, '#');
 450     if (f&f_zero) DPUTC(&dd, '0');
 451
 452     if (f&f_wd) {
 453       DENSURE(&dd, STEP);
 454       dd.len += sprintf(dd.buf + dd.len, "%d", wd);
 455     }
 456
 457     if (f&f_prec) {
 458       DENSURE(&dd, STEP + 1);
 459       dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
 460     }
 461
 462     /* --- Write out the length gadget --- */
 463
 464     switch (f&f_len) {
 465       case len_hh: DPUTC(&dd, 'h'); /* fall through */
 466       case len_h: DPUTC(&dd, 'h'); break;
 467       IF_LONGLONG( case len_ll: DPUTC(&dd, 'l'); /* fall through */ )
 468       case len_l: DPUTC(&dd, 'l'); break;
 469       case len_z: DPUTC(&dd, 'z'); break;
 470       case len_t: DPUTC(&dd, 't'); break;
 471       case len_L: DPUTC(&dd, 'L'); break;
 472       IF_INTMAX( case len_j: DPUTC(&dd, 'j'); break; )
 473       case len_std: break;
 474       default: abort();
 475     }
 476
 477     /* --- And finally the actually important bit --- */
 478
 479     DPUTC(&dd, fs->ch);
 480     DPUTZ(&dd);
 481
 482     /* --- Make sure we have enough space for the output --- */
 483
 484     sz = STEP;
 485     if (sz < wd) sz = wd;
 486     if (sz < prec + 16) sz = prec + 16;
 487     switch (fs->ch) {
 488       case 'a': case 'A':
 489       case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
 490 #ifdef HAVE_FLOAT_H
 491         if (fs->ch == 'f') {
 492           mx = ((fs->f&f_len) == len_L ?
 493                 LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
 494           if (sz < mx) sz = mx;
 495         }
 496         break;
 497 #else
 498 #  define MSG "<no float support>"
 499         if (ops->putm(out, MSG, sizeof(MSG) - 1)) return (-1);
 500         continue;
 501 #  undef MSG
 502 #endif
 503       case 's':
 504         if (!(f&f_prec)) {
 505           n = strlen(fa[fs->arg].u.s);
 506           if (sz < n) sz = n;
 507         }
 508         break;
 509       case 'n':
 510         switch (fs->fmt) {
 511 #define CASE(code, ty)                                                  \
 512   case fmt_##code: *fa[fs->arg].u.code = tot; break;
 513           PERCENT_N_FMTTYPES(CASE)
 514 #undef CASE
 515           default: abort();
 516         }
 517         continue;
 518     }
 519
 520     /* --- Finally do the output stage --- */
 521
 522     switch (fs->fmt) {
 523 #define CASE(code, ty)                                                  \
 524         case fmt_##code:                                                \
 525           i = ops->nputf(out, sz, dd.buf, fa[fs->arg].u.code);          \
 526           break;
 527       OUTPUT_FMTTYPES(CASE)
 528 #undef CASE
 529       default: abort();
 530     }
 531     if (i < 0) return (-1);
 532     tot += i;
 533   }
 534
 535   /* --- We're done --- */
 536
 537   DDESTROY(&dd);
 538   DA_DESTROY(&av);
 539   DA_DESTROY(&sv);
 540   return (tot);
 541 }
 542
 543 /* --- @gprintf@ --- *
 544  *
 545  * Arguments:   @const struct gprintf_ops *ops@ = output operations
 546  *              @void *out@ = context for output operations
 547  *              @const char *p@ = pointer to @printf@-style format string
 548  *              @...@ = argument handle
 549  *
 550  * Returns:     The number of characters written to the string.
 551  *
 552  * Use:         Formats a @printf@-like message and writes the result using
 553  *              the given output operations.  This is the backend machinery
 554  *              for @dstr_putf@, for example.
 555  */
 556
 557 int gprintf(const struct gprintf_ops *ops, void *out, const char *p, ...)
 558 {
 559   va_list ap;
 560   int n;
 561
 562   va_start(ap, p); n = vgprintf(ops, out, p, &ap); va_end(ap);
 563   return (n);
 564 }
 565
 566 /*----- Utilities ---------------------------------------------------------*/
 567
 568 /* --- @gprintf_memputf@ --- *
 569  *
 570  * Arguments:   @char **buf_inout@ = address of output buffer pointer
 571  *              @size_t *sz_inout@ = address of buffer size
 572  *              @size_t maxsz@ = buffer size needed for this operation
 573  *              @const char *p@ = pointer to format string
 574  *              @va_list *ap@ = captured format-arguments tail
 575  *
 576  * Returns:     The formatted length.
 577  *
 578  * Use:         Generic utility for mostly implementing the @nputf@ output
 579  *              function, if you don't have a better option.
 580  *
 581  *              On entry, @*buf_inout@ should be null or a buffer pointer,
 582  *              with @*sz_inout@ either zero or the buffer's size,
 583  *              respectively.  On exit, @*buf_input@ and @*sz_inout@ will be
 584  *              updated, if necessary, to describe a sufficiently large
 585  *              buffer, and the formatted string will have been written to
 586  *              the buffer.
 587  *
 588  *              When the buffer is no longer required, free it using @xfree@.
 589  */
 590
 591 size_t gprintf_memputf(char **buf_inout, size_t *sz_inout,
 592                     size_t maxsz, const char *p, va_list ap)
 593 {
 594   char *buf = *buf_inout;
 595   size_t sz = *sz_inout;
 596   int n;
 597
 598   if (sz <= maxsz) {
 599     if (!sz) sz = 32;
 600     while (sz <= maxsz) sz *= 2;
 601     if (buf) xfree(buf);
 602     buf = xmalloc(sz); *buf_inout = buf; *sz_inout = sz;
 603   }
 604
 605 #ifdef HAVE_SNPRINTF
 606   n = vsnprintf(buf, maxsz + 1, p, ap);
 607 #else
 608   n = vsprintf(buf, p, ap);
 609 #endif
 610   assert(0 <= n && n <= maxsz);
 611   return (n);
 612 }
 613
 614 /*----- Standard printers -------------------------------------------------*/
 615
 616 static int file_putch(void *out, int ch)
 617 {
 618   FILE *fp = out;
 619
 620   if (putc(ch, fp) == EOF) return (-1);
 621   return (0);
 622 }
 623
 624 static int file_putm(void *out, const char *p, size_t sz)
 625 {
 626   FILE *fp = out;
 627
 628   if (fwrite(p, 1, sz, fp) < sz) return (-1);
 629   return (0);
 630 }
 631
 632 static int file_nputf(void *out, size_t maxsz, const char *p, ...)
 633 {
 634   FILE *fp = out;
 635   va_list ap;
 636   int n;
 637
 638   va_start(ap, p);
 639   n = vfprintf(fp, p, ap);
 640   va_end(ap); if (n < 0) return (-1);
 641   return (0);
 642 }
 643
 644 const struct gprintf_ops file_printops =
 645   { file_putch, file_putm, file_nputf };
 646
 647 /*----- That's all, folks -------------------------------------------------*/