+/* -*-c-*-
+ *
+ * Generalized string formatting
+ *
+ * (c) 2023 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib. If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "config.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_FLOAT_H
+# include <float.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#include "darray.h"
+#include "dstr.h"
+#include "gprintf.h"
+#include "macros.h"
+
+/*----- Tunable constants -------------------------------------------------*/
+
+/* For each format specifier, at least @STEP@ bytes are ensured before
+ * writing the formatted result.
+ */
+
+#define STEP 64 /* Buffer size for @vgprintf@ */
+
+/*----- Preliminary definitions -------------------------------------------*/
+
+#ifdef HAVE_FLOAT_H
+# define IF_FLOAT(x) x
+#else
+# define IF_FLOAT(x)
+#endif
+
+#if defined(LLONG_MAX) || defined(LONG_LONG_MAX)
+# define IF_LONGLONG(x) x
+#else
+# define IF_LONGLONG(x)
+#endif
+
+#ifdef INTMAX_MAX
+# define IF_INTMAX(x) x
+#else
+# define IF_INTMAX(x)
+#endif
+
+#define OUTPUT_FMTTYPES(_) \
+ _(i, unsigned int) \
+ _(li, unsigned long) \
+ IF_LONGLONG( _(lli, unsigned long long) ) \
+ _(zi, size_t) \
+ _(ti, ptrdiff_t) \
+ IF_INTMAX( _(ji, uintmax_t) ) \
+ _(s, char *) \
+ _(p, void *) \
+ _(f, double) \
+ _(Lf, long double)
+
+#define PERCENT_N_FMTTYPES(_) \
+ _(n, int *) \
+ _(hhn, char *) \
+ _(hn, short *) \
+ _(ln, long *) \
+ _(zn, size_t *) \
+ _(tn, ptrdiff_t *) \
+ IF_LONGLONG( _(lln, long long *) ) \
+ IF_INTMAX( _(jn, intmax_t *) )
+
+#define FMTTYPES(_) \
+ OUTPUT_FMTTYPES(_) \
+ PERCENT_N_FMTTYPES(_)
+
+enum {
+ fmt_unset = 0,
+#define CODE(code, ty) fmt_##code,
+ FMTTYPES(CODE)
+#undef CODE
+ fmt__limit
+};
+
+struct fmtarg {
+ int fmt;
+ union {
+#define MEMB(code, ty) ty code;
+ FMTTYPES(MEMB)
+#undef MEMB
+ } u;
+};
+
+DA_DECL(fmtarg_v, struct fmtarg);
+
+enum {
+ len_std = 0,
+ len_hh,
+ len_h,
+ len_l,
+ len_ll,
+ len_z,
+ len_t,
+ len_j,
+ len_L
+};
+
+#define f_len 0x000fu
+#define f_wd 0x0010u
+#define f_wdarg 0x0020u
+#define f_prec 0x0040u
+#define f_precarg 0x0080u
+#define f_plus 0x0100u
+#define f_minus 0x0200u
+#define f_sharp 0x0400u
+#define f_zero 0x0800u
+#define f_posarg 0x1000u
+
+struct fmtspec {
+ const char *p;
+ size_t n;
+ unsigned f;
+ int fmt, ch;
+ int wd, prec;
+ int arg;
+};
+
+DA_DECL(fmtspec_v, struct fmtspec);
+
+/*----- Main code ---------------------------------------------------------*/
+
+/* --- @vgprintf@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *ops@ = output operations
+ * @void *out@ = context for output operations
+ * @const char *p@ = pointer to @printf@-style format string
+ * @va_list *ap@ = argument handle
+ *
+ * Returns: The number of characters written to the string.
+ *
+ * Use: As for @gprintf@, but takes a reified argument tail.
+ */
+
+static void set_arg(fmtarg_v *av, size_t i, int fmt)
+{
+ size_t j, n;
+
+ n = DA_LEN(av);
+ if (i >= n) {
+ DA_ENSURE(av, i + 1 - n);
+ for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
+ DA_UNSAFE_EXTEND(av, i + 1 - n);
+ }
+
+ if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
+ else assert(DA(av)[i].fmt == fmt);
+}
+
+int vgprintf(const struct gprintf_ops *ops, void *out,
+ const char *p, va_list *ap)
+{
+ size_t sz, mx, n;
+ dstr dd = DSTR_INIT;
+ fmtspec_v sv = DA_INIT;
+ fmtarg_v av = DA_INIT;
+ struct fmtarg *fa, *fal;
+ struct fmtspec *fs, *fsl;
+ unsigned f;
+ int i, anext, tot = 0;
+ int wd, prec;
+
+ /* --- Initial pass through the input, parsing format specifiers --- *
+ *
+ * We essentially compile the format string into a vector of @fmtspec@
+ * objects, each of which represents a chunk of literal text followed by a
+ * (possibly imaginary, in the case of the final one) formatting directive.
+ * Output then simply consists of interpreting these specifiers in order.
+ */
+
+ anext = 0;
+
+ while (*p) {
+ f = 0;
+ DA_ENSURE(&sv, 1);
+ fs = &DA(&sv)[DA_LEN(&sv)];
+ DA_UNSAFE_EXTEND(&sv, 1);
+
+ /* --- Find the end of this literal portion --- */
+
+ fs->p = p;
+ while (*p && *p != '%') p++;
+ fs->n = p - fs->p;
+
+ /* --- Some simple cases --- *
+ *
+ * We might have reached the end of the string, or maybe a `%%' escape.
+ */
+
+ if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
+ p++;
+ if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
+
+ /* --- Pick up initial flags --- */
+
+ flags:
+ for (;;) {
+ switch (*p) {
+ case '+': f |= f_plus; break;
+ case '-': f |= f_minus; break;
+ case '#': f |= f_sharp; break;
+ case '0': f |= f_zero; break;
+ default: goto done_flags;
+ }
+ p++;
+ }
+
+ /* --- Pick up the field width --- */
+
+ done_flags:
+ i = 0;
+ while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
+
+ /* --- Snag: this might have been an argument position indicator --- */
+
+ if (i && *p == '$' && (!f || f == f_zero)) {
+ f |= f_posarg;
+ fs->arg = i - 1;
+ p++;
+ goto flags;
+ }
+
+ /* --- Set the field width --- *
+ *
+ * If @i@ is nonzero here then we have a numeric field width. Otherwise
+ * it might be `*', maybe with an explicit argument number.
+ */
+
+ if (i) {
+ f |= f_wd;
+ fs->wd = i;
+ } else if (*p == '*') {
+ p++;
+ if (!ISDIGIT(*p))
+ i = anext++;
+ else {
+ i = *p++ - '0';
+ while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
+ assert(*p == '$'); p++;
+ assert(i > 0); i--;
+ }
+ f |= f_wd | f_wdarg;
+ set_arg(&av, i, fmt_i); fs->wd = i;
+ }
+
+ /* --- Maybe we have a precision spec --- */
+
+ if (*p == '.') {
+ p++;
+ f |= f_prec;
+ if (ISDIGIT(*p)) {
+ i = *p++ - '0';
+ while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
+ fs->prec = i;
+ } else if (*p != '*')
+ fs->prec = 0;
+ else {
+ p++;
+ if (!ISDIGIT(*p))
+ i = anext++;
+ else {
+ i = *p++ - '0';
+ while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
+ assert(*p == '$'); p++;
+ assert(i > 0); i--;
+ }
+ f |= f_precarg;
+ set_arg(&av, i, fmt_i); fs->prec = i;
+ }
+ }
+
+ /* --- Maybe some length flags --- */
+
+ switch (*p) {
+ case 'h':
+ p++;
+ if (*p == 'h') { f |= len_hh; p++; } else f |= len_h;
+ break;
+ case 'l':
+ p++;
+ IF_LONGLONG( if (*p == 'l') { f |= len_ll; p++; } else ) f |= len_l;
+ break;
+ case 'L': f |= len_L; p++; break;
+ case 'z': f |= len_z; p++; break;
+ case 't': f |= len_t; p++; break;
+ IF_INTMAX( case 'j': f |= len_j; p++; break; )
+ }
+
+ /* --- The flags are now ready --- */
+
+ fs->f = f;
+
+ /* --- At the end, an actual directive --- */
+
+ fs->ch = *p;
+ switch (*p++) {
+ case '%':
+ fs->fmt = fmt_unset;
+ break;
+ case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
+ switch (f&f_len) {
+ case len_l: fs->fmt = fmt_li; break;
+ case len_z: fs->fmt = fmt_zi; break;
+ case len_t: fs->fmt = fmt_ti; break;
+ IF_LONGLONG( case len_ll: fs->fmt = fmt_lli; break; )
+ IF_INTMAX( case len_j: fs->fmt = fmt_ji; break; )
+ default: fs->fmt = fmt_i;
+ }
+ break;
+ case 'a': case 'A':
+ case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
+ fs->fmt = (f&f_len) == len_L ? fmt_Lf : fmt_f;
+ break;
+ case 'c':
+ fs->fmt = fmt_i;
+ break;
+ case 's':
+ fs->fmt = fmt_s;
+ break;
+ case 'p':
+ fs->fmt = fmt_p;
+ break;
+ case 'n':
+ switch (f&f_len) {
+ case len_hh: fs->fmt = fmt_hhn; break;
+ case len_h: fs->fmt = fmt_hn; break;
+ case len_l: fs->fmt = fmt_ln; break;
+ case len_z: fs->fmt = fmt_zn; break;
+ case len_t: fs->fmt = fmt_tn; break;
+ IF_LONGLONG( case len_ll: fs->fmt = fmt_lln; break; )
+ IF_INTMAX( case len_j: fs->fmt = fmt_jn; break; )
+ default: fs->fmt = fmt_n;
+ }
+ break;
+ default:
+ fprintf(stderr,
+ "FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]);
+ abort();
+ }
+
+ /* --- Finally sort out the argument --- *
+ *
+ * If we don't have explicit argument positions then this comes after the
+ * width and precision; and we don't know the type code until we've
+ * parsed the specifier, so this seems the right place to handle it.
+ */
+
+ if (!(f&f_posarg)) fs->arg = anext++;
+ set_arg(&av, fs->arg, fs->fmt);
+ }
+
+ /* --- Quick pass over the argument vector to collect the arguments --- */
+
+ for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
+ switch (fa->fmt) {
+#define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
+ FMTTYPES(CASE)
+#undef CASE
+ default: abort();
+ }
+ }
+
+ /* --- Final pass through the format string to produce output --- */
+
+ fa = DA(&av);
+ for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
+ f = fs->f;
+
+ /* --- Output the literal portion --- */
+
+ if (fs->n) {
+ if (ops->putm(out, fs->p, fs->n)) return (-1);
+ tot += fs->n;
+ }
+
+ /* --- And now the variable portion --- */
+
+ if (fs->fmt == fmt_unset) {
+ switch (fs->ch) {
+ case 0: break;
+ case '%': ops->putch(out, '%'); break;
+ default: abort();
+ }
+ continue;
+ }
+
+ DRESET(&dd);
+ DPUTC(&dd, '%');
+
+ /* --- Resolve the width and precision --- */
+
+ if (!(f&f_wd))
+ wd = 0;
+ else {
+ wd = (fs->f&f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd;
+ if (wd < 0) { wd = -wd; f |= f_minus; }
+ }
+
+ if (!(f&f_prec))
+ prec = 0;
+ else {
+ prec = (fs->f&f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec;
+ if (prec < 0) { prec = 0; f &= ~f_prec; }
+ }
+
+ /* --- Write out the flags, width and precision --- */
+
+ if (f&f_plus) DPUTC(&dd, '+');
+ if (f&f_minus) DPUTC(&dd, '-');
+ if (f&f_sharp) DPUTC(&dd, '#');
+ if (f&f_zero) DPUTC(&dd, '0');
+
+ if (f&f_wd) {
+ DENSURE(&dd, STEP);
+ dd.len += sprintf(dd.buf + dd.len, "%d", wd);
+ }
+
+ if (f&f_prec) {
+ DENSURE(&dd, STEP + 1);
+ dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
+ }
+
+ /* --- Write out the length gadget --- */
+
+ switch (f&f_len) {
+ case len_hh: DPUTC(&dd, 'h'); /* fall through */
+ case len_h: DPUTC(&dd, 'h'); break;
+ IF_LONGLONG( case len_ll: DPUTC(&dd, 'l'); /* fall through */ )
+ case len_l: DPUTC(&dd, 'l'); break;
+ case len_z: DPUTC(&dd, 'z'); break;
+ case len_t: DPUTC(&dd, 't'); break;
+ case len_L: DPUTC(&dd, 'L'); break;
+ IF_INTMAX( case len_j: DPUTC(&dd, 'j'); break; )
+ case len_std: break;
+ default: abort();
+ }
+
+ /* --- And finally the actually important bit --- */
+
+ DPUTC(&dd, fs->ch);
+ DPUTZ(&dd);
+
+ /* --- Make sure we have enough space for the output --- */
+
+ sz = STEP;
+ if (sz < wd) sz = wd;
+ if (sz < prec + 16) sz = prec + 16;
+ switch (fs->ch) {
+ case 'a': case 'A':
+ case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
+#ifdef HAVE_FLOAT_H
+ if (fs->ch == 'f') {
+ mx = ((fs->f&f_len) == len_L ?
+ LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
+ if (sz < mx) sz = mx;
+ }
+ break;
+#else
+# define MSG "<no float support>"
+ if (ops->putm(out, MSG, sizeof(MSG) - 1)) return (-1);
+ continue;
+# undef MSG
+#endif
+ case 's':
+ if (!(f&f_prec)) {
+ n = strlen(fa[fs->arg].u.s);
+ if (sz < n) sz = n;
+ }
+ break;
+ case 'n':
+ switch (fs->fmt) {
+#define CASE(code, ty) \
+ case fmt_##code: *fa[fs->arg].u.code = tot; break;
+ PERCENT_N_FMTTYPES(CASE)
+#undef CASE
+ default: abort();
+ }
+ continue;
+ }
+
+ /* --- Finally do the output stage --- */
+
+ switch (fs->fmt) {
+#define CASE(code, ty) \
+ case fmt_##code: \
+ i = ops->nputf(out, sz, dd.buf, fa[fs->arg].u.code); \
+ break;
+ OUTPUT_FMTTYPES(CASE)
+#undef CASE
+ default: abort();
+ }
+ if (i < 0) return (-1);
+ tot += i;
+ }
+
+ /* --- We're done --- */
+
+ DDESTROY(&dd);
+ DA_DESTROY(&av);
+ DA_DESTROY(&sv);
+ return (tot);
+}
+
+/* --- @gprintf@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *ops@ = output operations
+ * @void *out@ = context for output operations
+ * @const char *p@ = pointer to @printf@-style format string
+ * @...@ = argument handle
+ *
+ * Returns: The number of characters written to the string.
+ *
+ * Use: Formats a @printf@-like message and writes the result using
+ * the given output operations. This is the backend machinery
+ * for @dstr_putf@, for example.
+ */
+
+int gprintf(const struct gprintf_ops *ops, void *out, const char *p, ...)
+{
+ va_list ap;
+ int n;
+
+ va_start(ap, p); n = vgprintf(ops, out, p, &ap); va_end(ap);
+ return (n);
+}
+
+/*----- Standard printers -------------------------------------------------*/
+
+static int file_putch(void *out, int ch)
+{
+ FILE *fp = out;
+
+ if (putc(ch, fp) == EOF) return (-1);
+ return (0);
+}
+
+static int file_putm(void *out, const char *p, size_t sz)
+{
+ FILE *fp = out;
+
+ if (fwrite(p, 1, sz, fp) < sz) return (-1);
+ return (0);
+}
+
+static int file_nputf(void *out, size_t maxsz, const char *p, ...)
+{
+ FILE *fp = out;
+ va_list ap;
+ int n;
+
+ va_start(ap, p);
+ n = vfprintf(fp, p, ap);
+ va_end(ap); if (n < 0) return (-1);
+ return (0);
+}
+
+const struct gprintf_ops file_printops =
+ { file_putch, file_putm, file_nputf };
+
+/*----- That's all, folks -------------------------------------------------*/