From eff136f63977a5493525610bff64d363581154af Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Mon, 16 Jun 2014 01:56:16 +0100 Subject: [PATCH] struct/dstr-putf.c (dstr_vputf): Rewrite to support `%n$...' specs. Organization: Straylight/Edgeware From: Mark Wooding The internals are generally just better now, and it should be easier to support other interesting things. --- configure.ac | 3 + struct/Makefile.am | 4 + struct/dstr-putf.c | 568 +++++++++++++++++++++++++------------- struct/dstr.3 | 6 +- struct/t/dstr-putf-test.c | 93 +++++++ struct/tests.at | 7 + 6 files changed, 493 insertions(+), 188 deletions(-) create mode 100644 struct/t/dstr-putf-test.c diff --git a/configure.ac b/configure.ac index a5e8547..2d4f0fb 100644 --- a/configure.ac +++ b/configure.ac @@ -57,6 +57,9 @@ dnl Libraries. AC_SEARCH_LIBS([socket], [socket]) AC_SEARCH_LIBS([gethostbyname], [nsl resolv]) +dnl Functions. +AC_CHECK_FUNCS([snprintf]) + dnl Types. AC_CHECK_TYPE([socklen_t], [int]) diff --git a/struct/Makefile.am b/struct/Makefile.am index a718c09..6365023 100644 --- a/struct/Makefile.am +++ b/struct/Makefile.am @@ -37,6 +37,10 @@ pkginclude_HEADERS += dstr.h dspool.h libstruct_la_SOURCES += dstr.c dstr-putf.c dspool.c LIBMANS += dstr.3 dspool.3 +check_PROGRAMS += t/dstr-putf.t +t_dstr_putf_t_SOURCES = t/dstr-putf-test.c +t_dstr_putf_t_LDFLAGS = -static + ## Buffers. pkginclude_HEADERS += buf.h libstruct_la_SOURCES += buf.c buf-dstr.c diff --git a/struct/dstr-putf.c b/struct/dstr-putf.c index ba715b1..6f699e4 100644 --- a/struct/dstr-putf.c +++ b/struct/dstr-putf.c @@ -29,6 +29,7 @@ #include "config.h" +#include #include #include #include @@ -40,16 +41,85 @@ # include #endif +#include "darray.h" #include "dstr.h" /*----- Tunable constants -------------------------------------------------*/ /* - * For each format specifier, at least @DSTR_PUTFSTEP@ bytes are ensured - * before writing the formatted result. + * For each format specifier, at least @PUTFSTEP@ bytes are ensured before + * writing the formatted result. */ -#define DSTR_PUTFSTEP 64 /* Buffer size for @putf@ */ +#define PUTFSTEP 64 /* Buffer size for @putf@ */ + +/*----- Preliminary definitions -------------------------------------------*/ + +#define OUTPUT_FMTTYPES(_) \ + _(i, unsigned int) \ + _(li, unsigned long) \ + _(s, char *) \ + _(p, void *) \ + _(f, double) \ + _(Lf, long double) + +#define PERCENT_N_FMTTYPES(_) \ + _(hn, short *) \ + _(n, int *) \ + _(ln, long *) + +#define FMTTYPES(_) \ + OUTPUT_FMTTYPES(_) \ + PERCENT_N_FMTTYPES(_) + +enum { + fmt_unset = 0, +#define CODE(code, ty) fmt_##code, + FMTTYPES(CODE) +#undef CODE + fmt__limit +}; + +typedef struct { + int fmt; + union { +#define MEMB(code, ty) ty code; + FMTTYPES(MEMB) +#undef MEMB + } u; +} fmtarg; + +DA_DECL(fmtarg_v, fmtarg); + +enum { + len_std = 0, + len_h, + len_l, + len_ll, + len_L +}; + +#define f_len 0x000fu +#define f_wd 0x0010u +#define f_wdarg 0x0020u +#define f_prec 0x0040u +#define f_precarg 0x0080u +#define f_plus 0x0100u +#define f_minus 0x0200u +#define f_sharp 0x0400u +#define f_zero 0x0800u +#define f_posarg 0x1000u + +typedef struct { + const char *p; + size_t n; + unsigned f; + int fmt, ch; + int wd, prec; + int arg; +} fmtspec; + +DA_DECL(fmtspec_v, fmtspec); /*----- Main code ---------------------------------------------------------*/ @@ -65,217 +135,345 @@ * supplied functions with @printf@-style interfaces. */ +static void set_arg(fmtarg_v *av, size_t i, int fmt) +{ + size_t j, n; + + n = DA_LEN(av); + if (i >= n) { + DA_ENSURE(av, i + 1 - n); + for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset; + DA_UNSAFE_EXTEND(av, i + 1 - n); + } + + if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt; + else assert(DA(av)[i].fmt == fmt); +} + int dstr_vputf(dstr *d, const char *p, va_list *ap) { - const char *q = p; size_t n = d->len; - size_t sz; + size_t sz, mx; dstr dd = DSTR_INIT; + fmtspec_v sv = DA_INIT; + fmtarg_v av = DA_INIT; + fmtarg *fa, *fal; + fmtspec *fs, *fsl; + unsigned f; + int i, anext; + int wd, prec; + + /* --- Initial pass through the input, parsing format specifiers --- * + * + * We essentially compile the format string into a vector of @fmtspec@ + * objects, each of which represnts a chunk of literal text followed by a + * (possibly imaginary, in the case of the final one) formatting directive. + * Output then simply consists of interpreting these specifiers in order. + */ + + anext = 0; while (*p) { - unsigned f; - int wd, prec; + f = 0; + DA_ENSURE(&sv, 1); + fs = &DA(&sv)[DA_LEN(&sv)]; + DA_UNSAFE_EXTEND(&sv, 1); + + /* --- Find the end of this literal portion --- */ + + fs->p = p; + while (*p && *p != '%') p++; + fs->n = p - fs->p; -#define f_short 1u -#define f_long 2u -#define f_Long 4u -#define f_wd 8u -#define f_prec 16u + /* --- Some simple cases --- * + * + * We might have reached the end of the string, or maybe a `%%' escape. + */ + + if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; } + p++; + if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; } - /* --- Most stuff gets passed on through --- */ + /* --- Pick up initial flags --- */ - if (*p != '%') { + flags: + for (;;) { + switch (*p) { + case '+': f |= f_plus; break; + case '-': f |= f_minus; break; + case '#': f |= f_sharp; break; + case '0': f |= f_zero; break; + default: goto done_flags; + } p++; - continue; } - /* --- Dump out what's between @q@ and @p@ --- */ + /* --- Pick up the field width --- */ - DPUTM(d, q, p - q); - p++; + done_flags: + i = 0; + while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0'; - /* --- Sort out the various silly flags and things --- */ + /* --- Snag: this might have been an argument position indicator --- */ - DPUTC(&dd, '%'); - f = 0; - sz = DSTR_PUTFSTEP; + if (i && *p == '$' && (!f || f == f_zero)) { + f |= f_posarg; + fs->arg = i - 1; + p++; + goto flags; + } - for (;;) { - switch (*p) { + /* --- Set the field width --- * + * + * If @i@ is nonzero here then we have a numeric field width. Otherwise + * it might be `*', maybe with an explicit argument number. + */ + + if (i) { + f |= f_wd; + fs->wd = i; + } else if (*p == '*') { + p++; + if (!isdigit((unsigned char)*p)) + i = anext++; + else { + i = *p++ - '0'; + while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0'; + assert(*p == '$'); p++; + assert(i > 0); i--; + } + f |= f_wd | f_wdarg; + set_arg(&av, i, fmt_i); fs->wd = i; + } + + /* --- Maybe we have a precision spec --- */ - /* --- Various simple flags --- */ - - case '+': - case '-': - case '#': - case '0': - goto putch; - case 'h': - f |= f_short; - goto putch; - case 'l': - f |= f_long; - goto putch; - case 'L': - f |= f_Long; - goto putch; - case 0: - goto finished; - - /* --- Field widths and precision specifiers --- */ - - { - int *ip; - - case '.': - DPUTC(&dd, '.'); - ip = ≺ - f |= f_prec; - p++; - goto getnum; - case '*': - ip = &wd; - f |= f_wd; - goto getnum; - default: - if (isdigit((unsigned char)*p)) { - f |= f_wd; - ip = &wd; - goto getnum; - } - DPUTC(d, *p); - goto formatted; - getnum: - *ip = 0; - if (*p == '*') { - *ip = va_arg(*ap, int); - DENSURE(&dd, DSTR_PUTFSTEP); - dd.len += sprintf(dd.buf + dd.len, "%i", *ip); - p++; - } else { - *ip = *p - '0'; - DPUTC(&dd, *p); - p++; - while (isdigit((unsigned char)*p)) { - DPUTC(&dd, *p); - *ip = 10 * *ip + *p++ - '0'; - } - } - break; + if (*p == '.') { + p++; + f |= f_prec; + if (isdigit((unsigned char)*p)) { + i = *p++ - '0'; + while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0'; + fs->prec = i; + } else if (*p != '*') + fs->prec = 0; + else { + p++; + if (!isdigit((unsigned char)*p)) + i = anext++; + else { + i = *p++ - '0'; + while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0'; + assert(*p == '$'); p++; + assert(i > 0); i--; } + f |= f_precarg; + set_arg(&av, i, fmt_i); fs->prec = i; + } + } - /* --- Output formatting --- */ - - case 'd': case 'i': case 'x': case 'X': case 'o': case 'u': - DPUTC(&dd, *p); - DPUTZ(&dd); - if ((f & f_prec) && prec + 16 > sz) - sz = prec + 16; - if ((f & f_wd) && wd + 1> sz) - sz = wd + 1; - DENSURE(d, sz); - if (f & f_long) - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, unsigned long)); - else - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, unsigned int)); - goto formatted; - - case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': -#ifdef HAVE_FLOAT_H - DPUTC(&dd, *p); - DPUTZ(&dd); - if (*p == 'f') { - size_t mx = (f & f_Long ? LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16; - if (mx > sz) - sz = mx; - } - if (!(f & f_prec)) - prec = 6; - else - sz += prec + 16; - if ((f & f_wd) && wd + 1 > sz) - sz = wd + 1; - DENSURE(d, sz); - if (f & f_Long) - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, long double)); - else - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, double)); - goto formatted; -#else - DPUTS(d, ""); -#endif + /* --- Maybe some length flags --- */ + + switch (*p) { + case 'h': f |= len_h; p++; break; + case 'l': f |= len_l; p++; break; + case 'L': f |= len_L; p++; break; + } + + /* --- The flags are now ready --- */ + + fs->f = f; - case 'c': - DPUTC(&dd, *p); - DPUTZ(&dd); - if ((f & f_wd) && wd + 1> sz) - sz = wd + 1; - DENSURE(d, sz); - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, unsigned)); - goto formatted; - - case 's': { - const char *s = va_arg(*ap, const char *); - sz = strlen(s); - DPUTC(&dd, *p); - DPUTZ(&dd); - if (f & f_prec) - sz = prec; - if ((f & f_wd) && wd > sz) - sz = wd; - DENSURE(d, sz + 1); - d->len += sprintf(d->buf + d->len, dd.buf, s); - goto formatted; + /* --- At the end, an actual directive --- */ + + fs->ch = *p; + switch (*p++) { + case '%': + fs->fmt = fmt_unset; + break; + case 'd': case 'i': case 'x': case 'X': case 'o': case 'u': + switch (f & f_len) { + case len_l: fs->fmt = fmt_li; break; + default: fs->fmt = fmt_i; + } + break; + case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': + fs->fmt = (f & f_len) == len_L ? fmt_Lf : fmt_f; + break; + case 'c': + fs->fmt = fmt_i; + break; + case 's': + fs->fmt = fmt_s; + break; + case 'p': + fs->fmt = fmt_p; + break; + case 'n': + switch (f & f_len) { + case len_h: fs->fmt = fmt_hn; break; + case len_l: fs->fmt = fmt_ln; break; + default: fs->fmt = fmt_n; } + break; + default: + fprintf(stderr, + "FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]); + abort(); + } + + /* --- Finally sort out the argument --- * + * + * If we don't have explicit argument positions then this comes after the + * width and precision; and we don't know the type code until we've + * parsed the specifier, so this seems the right place to handle it. + */ + + if (!(f & f_posarg)) fs->arg = anext++; + set_arg(&av, fs->arg, fs->fmt); + } + + /* --- Quick pass over the argument vector to collect the arguments --- */ + + for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) { + switch (fa->fmt) { +#define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break; + FMTTYPES(CASE) +#undef CASE + default: abort(); + } + } + + /* --- Final pass through the format string to produce output --- */ - case 'p': - DPUTC(&dd, *p); - DPUTZ(&dd); - if ((f & f_prec) && prec + 16 > sz) - sz = prec + 16; - if ((f & f_wd) && wd + 1> sz) - sz = wd + 1; - DENSURE(d, sz); - d->len += sprintf(d->buf + d->len, dd.buf, - va_arg(*ap, const void *)); - goto formatted; - - case 'n': - if (f & f_long) - *va_arg(*ap, long *) = (long)(d->len - n); - else if (f & f_short) - *va_arg(*ap, short *) = (short)(d->len - n); - else - *va_arg(*ap, int *) = (int)(d->len - n); - goto formatted; - - /* --- Other random stuff --- */ - - putch: - DPUTC(&dd, *p); - p++; - break; + fa = DA(&av); + for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) { + f = fs->f; + + /* --- Output the literal portion --- */ + + if (fs->n) DPUTM(d, fs->p, fs->n); + + /* --- And now the variable portion --- */ + + if (fs->fmt == fmt_unset) { + switch (fs->ch) { + case 0: break; + case '%': DPUTC(d, '%'); break; + default: abort(); } + continue; } - formatted: DRESET(&dd); - q = ++p; + DPUTC(&dd, '%'); + + /* --- Resolve the width and precision --- */ + + if (!(f & f_wd)) + wd = 0; + else { + wd = (fs->f & f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd; + if (wd < 0) { wd = -wd; f |= f_minus; } + } + + if (!(f & f_prec)) + prec = 0; + else { + prec = (fs->f & f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec; + if (prec < 0) { prec = 0; f &= ~f_prec; } + } + + /* --- Write out the flags, width and precision --- */ + + if (f & f_plus) DPUTC(&dd, '+'); + if (f & f_minus) DPUTC(&dd, '-'); + if (f & f_sharp) DPUTC(&dd, '#'); + if (f & f_zero) DPUTC(&dd, '0'); + + if (f & f_wd) { + DENSURE(&dd, PUTFSTEP); + dd.len += sprintf(dd.buf + dd.len, "%d", wd); + } -#undef f_short -#undef f_long -#undef f_Long -#undef f_wd -#undef f_prec + if (f & f_prec) { + DENSURE(&dd, PUTFSTEP + 1); + dd.len += sprintf(dd.buf + dd.len, ".%d", prec); + } + + /* --- Write out the length gadget --- */ + + switch (f & f_len) { + case len_h: DPUTC(&dd, 'h'); break; + case len_l: DPUTC(&dd, 'l'); break; + case len_L: DPUTC(&dd, 'L'); break; + case len_std: break; + default: abort(); + } + + /* --- And finally the actually important bit --- */ + + DPUTC(&dd, fs->ch); + DPUTZ(&dd); + + /* --- Make sure we have enough space for the output --- */ + + sz = PUTFSTEP; + if (sz < wd) sz = wd; + if (sz < prec + 16) sz = prec + 16; + switch (fs->ch) { + case 'a': case 'A': + case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': +#ifdef HAVE_FLOAT_H + if (fs->ch == 'f') { + mx = ((fs->f & f_len) == len_L ? + LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16; + if (sz < mx) sz = mx; + } + break; +#else + DPUTS(d, ""); + continue; +#endif + case 's': + if (!(f & f_prec)) { + n = strlen(fa[fs->arg].u.s); + if (sz < n) sz = n; + } + break; + case 'n': + switch (fs->fmt) { +#define CASE(code, ty) \ + case fmt_##code: *fa[fs->arg].u.code = d->len - n; break; + PERCENT_N_FMTTYPES(CASE) +#undef CASE + default: abort(); + } + continue; + } + + /* --- Finally do the output stage --- */ + + DENSURE(d, sz + 1); + switch (fs->fmt) { +#ifdef HAVE_SNPRINTF +# define CASE(code, ty) case fmt_##code: \ + i = snprintf(d->buf + d->len, sz + 1, dd.buf, fa[fs->arg].u.code); \ + break; +#else +# define CASE(code, ty) case fmt_##code: \ + i = sprintf(d->buf + d->len, dd.buf, fa[fs->arg].u.code); \ + break; +#endif + OUTPUT_FMTTYPES(CASE) +#undef CASE + default: abort(); + } + assert(0 <= i && i <= sz); d->len += i; } - DPUTM(d, q, p - q); -finished: + /* --- We're done --- */ + DPUTZ(d); DDESTROY(&dd); return (d->len - n); diff --git a/struct/dstr.3 b/struct/dstr.3 index 1cc866d..e59f370 100644 --- a/struct/dstr.3 +++ b/struct/dstr.3 @@ -330,10 +330,10 @@ and .BR sprintf (3) because the former has to do most of its work itself. In particular, .B dstr_putf -doesn't (and probably never will) understand the +understands the POSIX .RB ` n$ ' -positional parameter notation accepted by many Unix C libraries. There -is no macro equivalent of +positional parameter notation accepted by many Unix C libraries, even if +the underlying C library does not. There is no macro equivalent of .BR dstr_putf . .PP The function diff --git a/struct/t/dstr-putf-test.c b/struct/t/dstr-putf-test.c new file mode 100644 index 0000000..5de7870 --- /dev/null +++ b/struct/t/dstr-putf-test.c @@ -0,0 +1,93 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include + +#include "dstr.h" + +static int win = 0, lose = 0; +static dstr d = DSTR_INIT; +static char buf[1024]; + +static void check(const char *what, const char *want) +{ + if (strcmp(want, d.buf) == 0) + win++; + else { + lose++; + fprintf(stderr, "test failed: %s\n expected: %s\n found: %s\n", + what, want, d.buf); + } +} + +static void PRINTF_LIKE(1, 2) format(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + dstr_reset(&d); + dstr_vputf(&d, fmt, &ap); + va_end(ap); +} + +static void PRINTF_LIKE(1, 2) prepare(const char *fmt, ...) +{ + va_list ap; + int n; + + va_start(ap, fmt); +#ifdef HAVE_SNPRINTF + n = vsnprintf(buf, sizeof(buf), fmt, ap); +#else + n = vsprintf(buf, fmt, ap); +#endif + assert(0 <= n && n < sizeof(buf)); +} + +#define TEST1(fmtargs) do { \ + format fmtargs; \ + prepare fmtargs; \ + check(#fmtargs, buf); \ +} while (0) + +#define TEST2(fmtargs, want) do { \ + format fmtargs; \ + check(#fmtargs, want); \ +} while (0) + +#define LENGTHY \ + "This is a rather longer string than the code is expecting: will it fit?" + +int main(void) +{ + TEST2(("Hello, world!"), "Hello, world!"); + TEST2(("just a ->%%<- sign"), "just a ->%<- sign"); + TEST2(("Testing, testing, %d, %d, %d.", 1, 2, 3), + "Testing, testing, 1, 2, 3."); + TEST2(("->%5d<-", 138), "-> 138<-"); + TEST2(("->%*d<-", 5, 138), "-> 138<-"); + TEST2(("->%-*d<-", 5, 138), "->138 <-"); + TEST2(("->%*d<-", -5, 138), "->138 <-"); + TEST2(("->%-*d<-", -5, 138), "->138 <-"); + TEST2(("->%.*s<-", 5, "truncate me"), "->trunc<-"); + TEST2(("->%.*s<-", -5, "don't truncate me"), "->don't truncate me<-"); + TEST2(("Truncation indirect: ->%.*s<-", 10, "a long string to be chopped"), + "Truncation indirect: ->a long str<-"); + TEST2(("%08lx:%s", 0x65604204ul, "tripe-ec"), "65604204:tripe-ec"); + TEST2(("%s", LENGTHY), LENGTHY); + + TEST1(("big float: ->%f<- and integer %d\n", DBL_MAX, 42)); + + TEST2(("Testing, testing, %3$d, %2$d, %1$d.", 3, 2, 1), + "Testing, testing, 1, 2, 3."); + TEST2(("Truncation indirect: ->%1$.*2$s<-", + "a long string to be chopped", 10), + "Truncation indirect: ->a long str<-"); + + if (!lose) printf("All tests successful.\n"); + else printf("FAILED %d of %d tests.\n", lose, win + lose); + return (!!lose); +} diff --git a/struct/tests.at b/struct/tests.at index b93f1a2..f54deb5 100644 --- a/struct/tests.at +++ b/struct/tests.at @@ -47,6 +47,13 @@ for seed in 0x0394946c 0xe8991664 ""; do done AT_CLEANUP +## dstr +AT_SETUP([struct: dstr-putf]) +AT_KEYWORDS([struct dstr putf dstr_putf]) +AT_CHECK([BUILDDIR/t/dstr-putf.t], [0], [All tests successful. +]) +AT_CLEANUP + ## sym AT_SETUP([struct: sym]) AT_KEYWORDS([struct sym]) -- [mdw]