From 289651a7df7bc48724137cd0faaf8535fb54e73b Mon Sep 17 00:00:00 2001
Message-Id: <289651a7df7bc48724137cd0faaf8535fb54e73b.1718337449.git.mdw@distorted.org.uk>
From: Mark Wooding <mdw@chiark.greenend.org.uk>
Date: Mon, 22 Apr 2024 18:15:31 +0100
Subject: [PATCH] @@@ mostly bench docs
Organization: Straylight/Edgeware

From: Mark Wooding <mdw@distorted.org.uk>

---
 struct/buf-float.c       | 183 +++++--------
 struct/buf.h             |  12 +-
 test/bench.3.in          | 550 +++++++++++++++++++++++++++++++++------
 test/bench.c             |  27 +-
 test/bench.h             | 280 +++++++++-----------
 test/example/Makefile.am |   9 +
 test/example/adhoc.c     |  66 +----
 test/example/bench.c     |  91 +++++++
 test/example/example.h   |  17 ++
 test/example/fib.c       |  71 +++++
 utils/align.3.in         |  48 +++-
 utils/fltfmt.c           |   4 +-
 utils/macros.h           |  15 +-
 utils/maths.h            |   6 -
 utils/t/fltfmt-test.c    |  56 ++--
 15 files changed, 953 insertions(+), 482 deletions(-)
 create mode 100644 test/example/bench.c
 create mode 100644 test/example/fib.c

diff --git a/struct/buf-float.c b/struct/buf-float.c
index 5b5b625..e163ff7 100644
--- a/struct/buf-float.c
+++ b/struct/buf-float.c
@@ -33,8 +33,21 @@
 #include "bits.h"
 #include "buf.h"
 #include "fltfmt.h"
+#include "macros.h"
 
-/*----- External functions ------------------------------------------------*/
+/*----- Constants ---------------------------------------------------------*/
+
+/* Tolerable errors.  These aren't great, and all of them imply a failure to
+ * faithfully pass the value on, but they're also the inevitable consequence
+ * of having different floating-point systems.
+ */
+#define IGNERR (FLTERR_INEXACT | FLTERR_OFLOW | FLTERR_UFLOW)
+
+/*----- Main code ---------------------------------------------------------*/
+
+#define FORMATS(_)							\
+  _(flt, float, f32, 4)							\
+  _(dbl, double, f64, 8)
 
 /* --- @buf_getf{32,64}{,l,b} --- *
  *
@@ -49,67 +62,29 @@
  *		the usual round-to-nearest/ties-to-even rounding mode.
  */
 
-int buf_getf32(buf *b, float *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_f32btoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf32l(buf *b, float *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf32b(buf *b, float *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_getf32)(dbuf *db, float *x_out)
-  { return (dbuf_getf32(db, x_out)); }
-int (dbuf_getf32l)(dbuf *db, float *x_out)
-  { return (dbuf_getf32l(db, x_out)); }
-int (dbuf_getf32b)(dbuf *db, float *x_out)
-  { return (dbuf_getf32b(db, x_out)); }
-
-int buf_getf64(buf *b, double *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_f64btodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf64l(buf *b, double *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf64b(buf *b, double *x_out)
-{
-  const octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_getf64)(dbuf *db, double *x_out)
-  { return (dbuf_getf64(db, x_out)); }
-int (dbuf_getf64l)(dbuf *db, double *x_out)
-  { return (dbuf_getf64l(db, x_out)); }
-int (dbuf_getf64b)(dbuf *db, double *x_out)
-  { return (dbuf_getf64b(db, x_out)); }
+#define DEFGET1(ty, cty, fty, e, xe, w)					\
+  int GLUE3(buf_get, fty, xe)(buf *b, cty *x_out)			\
+  {									\
+    const octet *p;							\
+    unsigned err;							\
+									\
+    p = buf_get(b, w); if (!p) return (-1);				\
+    err = fltfmt_##fty##e##to##ty(x_out, p, FLTRND_NEAREVEN);		\
+      if (err&~IGNERR) { BBREAK(b); return (-1); }			\
+    return (0);								\
+  }									\
+  int (GLUE3(dbuf_get, fty, xe))(dbuf *db, cty *x_out)			\
+    { return (GLUE3(dbuf_get, fty, xe)(db, x_out)); }
+
+#define DEFGET(ty, cty, fty, w)						\
+  DEFGET1(ty, cty, fty, b, EMPTY, w)					\
+  DEFGET1(ty, cty, fty, l, l, w)					\
+  DEFGET1(ty, cty, fty, b, b, w)
+
+FORMATS(DEFGET)
+
+#undef DEFGET1
+#undef DEFGET
 
 /* --- @buf_putf{32,64}{,l,b} --- *
  *
@@ -124,66 +99,28 @@ int (dbuf_getf64b)(dbuf *db, double *x_out)
  *		the usual round-to-nearest/ties-to-even rounding mode.
  */
 
-int buf_putf32(buf *b, float x)
-{
-  octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf32l(buf *b, float x)
-{
-  octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_flttof32l(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf32b(buf *b, float x)
-{
-  octet *p;
-
-  p = buf_get(b, 4); if (!p) return (-1);
-  fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_putf32)(dbuf *db, float x)
-  { return (dbuf_putf32(db, x)); }
-int (dbuf_putf32l)(dbuf *db, float x)
-  { return (dbuf_putf32l(db, x)); }
-int (dbuf_putf32b)(dbuf *db, float x)
-  { return (dbuf_putf32b(db, x)); }
-
-int buf_putf64(buf *b, double x)
-{
-  octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf64l(buf *b, double x)
-{
-  octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_dbltof64l(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf64b(buf *b, double x)
-{
-  octet *p;
-
-  p = buf_get(b, 8); if (!p) return (-1);
-  fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_putf64)(dbuf *db, double x)
-  { return (dbuf_putf64(db, x)); }
-int (dbuf_putf64l)(dbuf *db, double x)
-  { return (dbuf_putf64l(db, x)); }
-int (dbuf_putf64b)(dbuf *db, double x)
-  { return (dbuf_putf64b(db, x)); }
+#define DEFPUT1(ty, cty, fty, e, xe, w)					\
+  int GLUE3(buf_put, fty, xe)(buf *b, cty x)				\
+  {									\
+    octet *p;								\
+    unsigned err;							\
+									\
+    p = buf_get(b, w); if (!p) return (-1);				\
+    err = fltfmt_##ty##to##fty##e(p, x, FLTRND_NEAREVEN);		\
+      if (err&~IGNERR) { BBREAK(b); return (-1); }			\
+    return (0);								\
+  }									\
+  int (GLUE3(dbuf_put, fty, xe))(dbuf *db, cty x)			\
+    { return (GLUE3(dbuf_put, fty, xe)(db, x)); }
+
+#define DEFPUT(ty, cty, fty, w)						\
+  DEFPUT1(ty, cty, fty, b, EMPTY, w)					\
+  DEFPUT1(ty, cty, fty, l, l, w)					\
+  DEFPUT1(ty, cty, fty, b, b, w)
+
+FORMATS(DEFPUT)
+
+#undef DEFPUT1
+#undef DEFPUT
 
 /*----- That's all, folks -------------------------------------------------*/
diff --git a/struct/buf.h b/struct/buf.h
index 2cfffa9..e346b37 100644
--- a/struct/buf.h
+++ b/struct/buf.h
@@ -630,9 +630,9 @@ extern int buf_getf32b(buf */*b*/, float */*x_out*/);
 #define dbuf_getf32l(db, x_out) (buf_getf32l(DBUF_BUF(db), (x_out)))
 #define dbuf_getf32b(db, x_out) (buf_getf32b(DBUF_BUF(db), (x_out)))
 
-extern int dbuf_getf64(dbuf */*db*/, double */*x_out*/);
-extern int dbuf_getf64l(dbuf */*db*/, double */*x_out*/);
-extern int dbuf_getf64b(dbuf */*db*/, double */*x_out*/);
+extern int buf_getf64(buf */*b*/, double */*x_out*/);
+extern int buf_getf64l(buf */*b*/, double */*x_out*/);
+extern int buf_getf64b(buf */*b*/, double */*x_out*/);
 #define dbuf_getf64(db, x_out) (buf_getf64(DBUF_BUF(db), (x_out)))
 #define dbuf_getf64l(db, x_out) (buf_getf64l(DBUF_BUF(db), (x_out)))
 #define dbuf_getf64b(db, x_out) (buf_getf64b(DBUF_BUF(db), (x_out)))
@@ -657,9 +657,9 @@ extern int buf_putf32b(buf */*b*/, float /*x*/);
 #define dbuf_putf32l(db, x) (buf_putf32l(DBUF_BUF(db), (x)))
 #define dbuf_putf32b(db, x) (buf_putf32b(DBUF_BUF(db), (x)))
 
-extern int dbuf_putf64(dbuf */*db*/, double /*x*/);
-extern int dbuf_putf64l(dbuf */*db*/, double /*x*/);
-extern int dbuf_putf64b(dbuf */*db*/, double /*x*/);
+extern int buf_putf64(buf */*b*/, double /*x*/);
+extern int buf_putf64l(buf */*b*/, double /*x*/);
+extern int buf_putf64b(buf */*b*/, double /*x*/);
 #define dbuf_putf64(db, x) (buf_putf64(DBUF_BUF(db), (x)))
 #define dbuf_putf64l(db, x) (buf_putf64l(DBUF_BUF(db), (x)))
 #define dbuf_putf64b(db, x) (buf_putf64b(DBUF_BUF(db), (x)))
diff --git a/test/bench.3.in b/test/bench.3.in
index b4fc429..dd61af9 100644
--- a/test/bench.3.in
+++ b/test/bench.3.in
@@ -30,11 +30,22 @@
 .\"--------------------------------------------------------------------------
 .TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library"
 .\" @bench_createtimer
+.\" @BENCH_TIMELOOP_DECLS
+.\" @BENCH_TIMELOOP_TAG
+.
 .\" @bench_init
 .\" @bench_destroy
 .\" @bench_calibrate
+.\" @bench_preflight
+.\" @bench_adapt
+.\" @bench_adjust
+.\" @BENCH_MEASURE_DECLS
+.\" @BENCH_MEASURE_TAG
+.\" @BENCH_MEASURE
 .\" @bench_measure
 .
+.\" @bench_report
+.
 .\"--------------------------------------------------------------------------
 .SH NAME
 bench \- low-level benchmarking tools
@@ -45,6 +56,10 @@ bench \- low-level benchmarking tools
 .nf
 .B "#include <mLib/bench.h>"
 .PP
+.B "#define BTF_TIMEOK ..."
+.B "#define BTF_CYOK ..."
+.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
+.PP
 .ta 2n +2n +2n
 .B "struct bench_time {"
 .B "	unsigned f;"
@@ -71,7 +86,7 @@ bench \- low-level benchmarking tools
 .ta 2n +\w'\fBint (*now)('u
 .BI "	int (*now)(struct bench_timer *" bt ,
 .BI "		struct bench_time *" t_out ", unsigned " f );
-.ta 2n +\w'\void (*diff)('u
+.ta 2n +\w'\fBvoid (*diff)('u
 .BI "	void (*diff)(struct bench_timer *" bt ,
 .BI "		struct bench_timing *" delta_out ,
 .BI "		const struct bench_time *" t0 ,
@@ -80,8 +95,21 @@ bench \- low-level benchmarking tools
 .B "};"
 .B "struct bench_timer {"
 .B "	const struct bench_timerops *ops;"
+.B "	unsigned ref;"
 .B "};"
 .PP
+.B "struct bench_timer *bench_createtimer(void);"
+.B "BENCH_TIMELOOP_DECLS;"
+.ta 2n \w'\fBBENCH_TIMELOOP_TAG('u
+.BI "BENCH_TIMELOOP_TAG(" tag ", struct bench_timer *" tm ,
+.BI "		struct bench_timing *" delta_out ", double " n ,
+.BI "		" onbreak )
+.BI "	" stmt
+.PP
+.B "#define BTF_CLB ..."
+.B "#define BTF_INDIRECT ..."
+.PP
+.ta 2n
 .B "struct bench_state {"
 .B "	unsigned f;"
 .B "	double target_s;"
@@ -90,19 +118,40 @@ bench \- low-level benchmarking tools
 .PP
 .BI "typedef void bench_fn(unsigned long " n ", void *" ctx );
 .PP
-.B "#define BTF_TIMEOK ..."
-.B "#define BTF_CYOK ..."
-.B "#define BTF_CLB ..."
-.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
-.PP
-.B "struct bench_timer *bench_createtimer(void);"
-.PP
 .BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm );
 .BI "void bench_destroy(struct bench_state *" b );
-.BI "int bench_calibrate(struct bench_state *" b );
+.BI "int bench_calibrate(struct bench_state *" b ", unsigned " f );
+.BI "int bench_preflight(struct bench_state *" b );
+.ta \w'\fBint bench_adapt('u
+.BI "int bench_adapt(struct bench_state *" b ", double *" n_inout ,
+.BI "	const struct bench_timing *" t );
+.ta \w'\fBint bench_adjust('u
+.BI "int bench_adjust(struct bench_state *" b ", struct bench_timing *" t_inout ,
+.BI "	double " n ", double " base );
+.B "BENCH_MEASURE_DECLS;"
+.ta 2n \w'\fBBENCH_MEASURE_TAG('u
+.BI "BENCH_MEASURE_TAG(" tag ", struct bench_state *" b ,
+.BI "		int &" rc ", struct bench_timing *" t_out ", double " bsae )
+.BI "	" stmt
+.ta 2n \w'\fBBENCH_MEASURE('u
+.BI "BENCH_MEASURE(struct bench_state *" b ,
+.BI "		int &" rc ", struct bench_timing *" t_out ", double " bsae )
+.BI "	" stmt
 .ta \w'\fBint bench_measure('u
 .BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out ,
 .BI "	double " base ", bench_fn *" fn ", void *" ctx );
+.PP
+.ta 2n
+.B "enum {"
+.B "	BTU_OP = 0,"
+.B "	BTU_BYTE = 1,"
+.B "	..."
+.BI "	BTU_LIMIT = " n
+.B "};"
+.ta \w'\fBvoid bench_report('u
+.BI "void bench_report(const struct gprintf_ops *" gops ", void *" go ,
+.BI "	unsigned " unit ", const struct bench_timing *" t );
+.PP
 .fi
 .
 .\"--------------------------------------------------------------------------
@@ -113,17 +162,20 @@ The header file
 provides declarations and defintions
 for performing low-level benchmarks.
 .PP
-The `main event' is
-.BR bench_measure .
-This function will be described in detail later,
+The `main event' are the
+.B BENCH_MEASURE
+macro and
+.B bench_measure
+function.
+These will be described in detail later,
 but, in brief,
-it calls a caller-provided function,
+they execute a caller-provided piece of code
 instructing it to run adaptively chosen numbers of iterations,
 in order to get a reasonably reliable measurement of its running time,
-and then reports its results by filling in a structure.
+and then report the results by filling in a structure.
 .PP
-With understanding this function as our objective,
-we must examine all of the pieces involved in making it work.
+With understanding these as our objective,
+we must examine all of the pieces involved in making them work.
 .
 .SS Timers in general
 A
@@ -133,11 +185,14 @@ in seconds (ideally precise to tiny fractions of a second),
 and/or in CPU cycles.
 A timer is represented by a pointer to an object of type
 .BR "struct bench_timer" .
-This structure has a single member,
+This structure has two members:
 .BR ops ,
 pointing to a
 .BR "struct bench_timerops" ,
-which is a table of function pointers;
+which is a table of function pointers,
+and
+.BR ref ,
+which is a simple reference count;
 typically, a timer has more data following this,
 but this fact is not exposed to applications.
 .PP
@@ -152,7 +207,7 @@ must always point to the timer object itself.
 .IB tm ->ops->describe( tm ", " d)
 Write a description of the timer to the dynamic string
 .IR d .
-.TPc
+.TP
 .IB tm ->ops->preflight( tm )
 Ensure that the timer is in working order,
 and perform any necessary per-thread or per-process setup.
@@ -193,6 +248,18 @@ and
 Destroy the timer,
 releasing all of the resources that it holds.
 .PP
+In a freshly-created timer, the
+.B ref
+member is 1.
+Applications are expected to handle the reference count themselves;
+the
+.B destroy
+function does not check or decrement the count.
+Code for destroying the timer when it's no longer needed
+might look like this.
+.VS
+if (!--tm->ref) tm->ops->destroy(tm);
+.VE
 A
 .B bench_timing
 structure reports the difference between two times,
@@ -224,10 +291,15 @@ is nonzero (true)
 if the timer returned any valid timing information.
 .TP
 .B n
-The number of iterations performed by the benchmark function
+The number of units processed the benchmark computation
 on its satisfactory run,
-multiplied by
-.IR base .
+multiplied by a given
+.IR base
+\(en see
+.BR BENCH_MEASURE ,
+.BR bench_measure ,
+and
+.BR bench_adjust .
 .TP
 .B t
 The time taken for the satisfactory run of the benchmark function,
@@ -270,6 +342,104 @@ member stores flags
 and or
 .B BTF_CYOK
 if the passage-of-time or cycle count respectively are valid.
+.PP
+The
+.B BENCH_TIMELOOP_TAG
+macro uses a timer to measure a number of iterations of a computation.
+It requires the declarations made by
+.B BENCH_TIMELOOP_DECLS
+to be in scope,
+ideally within an enclosing block
+(rather than at top-level,
+where they'll have static storage duration,
+and take longer to access).
+The macro's expansion is syntactically a statement head;
+see
+.BR control (3)
+for details about the underlying machinery.
+In more detail, the macro is invoked as
+.IP
+.nf
+.ta 2n
+.BI "BENCH_TIMELOOP_TAG(" tag ", " tm ", " delta_out ", " n ", " onbreak )
+.BI "	" stmt
+.fi
+.PP
+The
+.I tag
+argument is used to distinguish
+the labels used internally by the macro:
+see
+.BR control (3)
+for details about tags.
+The macro calls on the timer
+.I tm
+to determine the initial time and cycle counts,
+performs
+.I n
+iterations of some computation,
+and calls on the timer a second time
+to determine the final time and cycle counts,
+and to store the difference in
+.BI * delta_out \fR.
+The
+.I stmt
+may be any C statement:
+when it is executed,
+the variable
+.BR _bench_n ,
+of type
+.BR "unsigned long" ,
+is in scope.
+The statement should perform
+.B _bench_n
+iterations of the computation to be measured
+\(en and do as little else as possible.
+The argument
+.I n
+to the macro
+may be larger than
+.BR ULONG_MAX :
+the macro will execute
+.I stmt
+multiple times if necessary.
+The statement is allowed to clobber
+.BR _bench_n .
+Note that
+.B BENCH_TIMELOOP_TAG
+does
+.I not
+call the timer's
+.B preflight
+function.
+If the
+.I stmt
+executes a free
+.B break
+statement
+then the statement
+.I onbreak
+is executed;
+a free
+.B continue
+statement within
+.I stmt
+currently does not have a useful behaviour.
+Free
+.B break
+and
+.B continue
+statements within
+.I onbreak
+behave normally.
+(See
+.BR control (3)
+for a definition of
+`free'
+.B break
+and
+.B continue
+statements.)
 .
 .SS The built-in timer
 The function
@@ -383,7 +553,12 @@ the similarly-named clock subtimer does nothing
 except check that the
 .B linux-x86-perf-rdpmc-hw-cycles
 cycle subtimer has been selected.
-This is almost certainly the best choice if it's available.
+This is almost certainly the best choice if it's available;
+It is, however, not compatible with (at least some versions of)
+.BR valgrind (1);
+it will detect that it is running under
+.B valgrind
+and fail to initialize.
 .TP
 .B x86-rdtscp
 Counts CPU cycles using the x86
@@ -446,7 +621,10 @@ If
 is called with a non-null timer pointer,
 then it will not fail;
 the benchmark state will be initialized,
-and the function returns zero.
+and the function returns zero;
+the timer's reference count is
+.I not
+incremented.
 If the timer pointer is null,
 then
 .B bench_init
@@ -457,29 +635,30 @@ If this succeeds,
 then the benchmark state will be initialized,
 and the function returns zero.
 In both cases,
-the timer becomes owned by the benchmark state:
+the timer reference becomes owned by the benchmark state:
 calling
 .B bench_destroy
-on the benchmark state will destroy the timer.
+on the benchmark state will decrement the timer's reference count,
+and destroy it unless it has additional outstanding references.
 If
 .B bench_init
 is called with a null timer pointer,
 and its attempt to create a timer for itself fails,
 then
 .B bench_init
-returns \-1;
+returns \-1:
 the benchmark state is not initialized
-and can safely be discarded;
-calling
-safe to call
-.B bench_destroy
-on the unsuccessfully benchmark state is safe and has no effect.
+and can safely be discarded.
 .PP
 Calling
 .B bench_destroy
 on a benchmark state
 releases any resources it holds,
 most notably its timer, if any.
+Calling
+.B bench_destroy
+on an unsuccessfully initialized benchmark state
+is safe but has no effect.
 .PP
 Although
 .B struct bench_state
@@ -510,9 +689,35 @@ Either or both of these calibrations can succeed or fail;
 if passage-of-time calibration fails,
 then cycle count calibration is impossible.
 .PP
+The benchmarking state must be calibrated differently
+for different kinds of timing loop;
+this is controlled by the flags passed as the
+.I f
+argument to
+.BR bench_calibrate .
+The main difference lies in whether the code to be measured
+is called
+.IR indirectly ,
+i.e., via a function pointer.
+Set
+.B BTF_INDIRECT
+if the code is to be called indirectly;
+leave this flag clear if the code is called directly.
+The
+.B bench_measure
+function always makes indirect calls;
+the
+.B BENCH_MEASURE
+macro does not itself make indirect calls.
+Usually, a program needs only one or the other;
+if both are necessary for some reason,
+the best approach is just to set up two benchmarking states
+sharing the same timer,
+and calibrate them separately.
+.PP
 When it completes,
 .B bench_calibrate
-sets flag in the benchmark state's
+sets flags in the benchmark state's
 .B f
 member:
 if passage-of-time calibration succeeded,
@@ -537,11 +742,96 @@ is called for a second or subsequent time on the same benchmark state,
 it returns immediately,
 either returning 0 or \-1
 according to whether passage-of-time had previously been calibrated.
-.
-.SS Timing functions
-A
+.PP
+The
+.B BENCH_MEASURE
+macro measures the performance of a computation.
+It requires the declarations made by
+.B BENCH_MEASURE_DECLS
+to be in scope,
+ideally within an enclosing block
+(rather than at top-level,
+where they'll have static storage duration,
+and take longer to access).
+The macro's expansion is syntactically a statement head;
+see
+.BR control (3)
+for details about the underlying machinery.
+In more detail, the macro is invoked as
+.IP
+.nf
+.ta 2n
+.BI "BENCH_MEASURE(" b ", " rc ", " t_out ", " base )
+.BI "	" stmt
+.fi
+.PP
+The
+.I stmt
+can be any C statement;
+it should perform
+.B _bench_n
+iterations of the computation to be measured.
+(The variable
+.B _bench_n
+is declarared as part of
+.B BENCH_MEASURE_DECLS
+and has type
+.BR "unsigned long" .
+Before commencing measurement proper,
+the macro calls
+.BR bench_preflight ,
+described below,
+to check that everything is set up properly
+for measurements on the current thread;
+if this fails, then the macro sets
+.I rc
+to \-1.
+Otherwise, the macro executes
+.I stmt
+one or more times,
+with the objective of finding an iteration count
+.I n
+such that
+.I n
+iterations of the computation take more than
+.IB b ->target_s "" \fR/\(sr2
+seconds.
+If measurement fails,
+then
+.I rc
+is set to \-1;
+otherwise,
+.I rc
+is set to zero, and
+.BI * t_out
+is filled in with the measurement;
+.IB t_out ->n
+is set to
+.IR n "\ \(mu\ " base .
+.PP
+The
+.B BENCH_MEASURE_TAG
+macro works just like
+.B BENCH_MEASURE
+except that it takes an additional
+.I tag
+argument used to distinguish the internal labels
+used by the macro's implementation;
+this makes it possible to use
+.B BENCH_MEASURE_TAG
+as a component in more complex macros.
+See
+.BR control (3)
+for details about control-structure macros
+and the meaning and format of tags.
+.PP
+The function
+.B bench_measure
+is similar,
+except that it calls a
 .I benchmark function
-has the signature
+to perform the computation.
+A benchmark function has the signature
 .IP
 .BI "void " fn "(unsigned long " n ", void *" ctx );
 .PP
@@ -553,61 +843,153 @@ The
 argument is a pointer passed into
 .B bench_measure
 for the benchmark function's own purposes.
-.PP
-The function
+The
 .B bench_measure
-receives five arguments.
-.TP
-.I b
-points to the benchmark state to be used.
-.TP
-.I t_out
-is the address of a
-.BR struct bench_timing
-in which the measurement should be left.
-This structure is described below.
-.TP
-.I base
-is a count of the number of operations performed
-by each iteration of the benchmark function.
-.TP
-.I fn
-is a benchmark function, described above.
-.TP
-.I ctx
-is a pointer to be passed to the benchmark function.
+function returns zero on success,
+or \-1 on failure.
+Note that
 .B bench_measure
-does not interpret this pointer in any way.
+invokes the benchmark indirectly,
+so the benchmark state should have been calibrated with
+.BR BTF_INDIRECT .
+.
+.SS Measurement utilities
+The following functions are primarily exported for the benefit of the
+.B BENCH_MEASURE
+macro,
+but are documented here in case they are useful.
 .PP
 The
-.B bench_measure
-function calls its benchark function repeatedly
-with different iteration counts
-.IR n ,
-with the objective that the call take approximately
-.B target_s
-seconds, as established in the benchmark state.
-(Currently, if
-.B target_s
-holds the value
-.IR t ,
-then
-.B bench_measure
-is satisfied when a call takes at least
-.IR t /\(sr2\*,s.)
-Once the function finds a satisfactory number of iterations,
-it stores the results in
-.BI * t_out \fR.
-If measurement succeeds, then
-.B bench_measure
-returns zero.
-If it fails \(en
-most likely because the timer failed \(en
-then it returns \-1.
+.B bench_preflight
+function prepares a benchmarking state for use.
+It checks that the timer is calibrated
+and suitable for measuring passage-of-time;
+it also calls the timer's
+.B preflight
+function to prepare it for measurements on the current thread.
+If these checks succeed, then
+.B bench_preflight
+returns zero;
+otherwise it returns \-1
+and the caller should not proceed with measurements.
+.PP
+The
+.B bench_adapt
+function is used to determine iteration counts.
+It is used in a loop such as the following.
+.IP
+.nf
+.ta 2n +2n
+.B "BENCH_TIMELOOP_DECLS;"
+.B "struct bench_timer *tm;"
+.B "struct bench_timing t;"
+.B "double n = 1.0, target_s = 1.0;"
+.IP
+.B "do {"
+.B "	BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })"
+.BI "		" "(do " _bench_n " iterations of some computation)" ;
+.B "} while (!bench_adapt(&n, target_s, &t));"
+.fi
+.PP
+On entry,
+.BI *n_inout
+should be the number of iterations performed by the previous step,
+and
+.BI * t
+the resulting time;
+the
+.B BTF_TIMEOK
+flag must be set in
+.IB t ->f \fR.
+If the timing is sufficient \(en if
+.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+\(en then
+.B bench_adapt
+returns a nonzero value to indicate that measurement is complete.
+Otherwise, it sets
+.BI * n_inout
+to a new, larger iteration count
+and returns zero to indicate that a further pass is necessary.
+.PP
+The
+.B bench_adjust
+function adjusts a raw timing,
+as captured by
+.BR BENCH_TIMELOOP_TAG ,
+according to the calibration data captured in
+.IR b .
+On exit, the timing data is updated,
+and
+.IB t ->n
+is set to the product
+.IR n "\ \(mu\ " base .
+.
+.SS Reporting results
+The
+.B bench_report
+function formats a measurement result
+into a human-readable string.
+The function writes its output using the
+generalized output formatting operations
+.I gops
+and output pointer
+.IR go ;
+see
+.BR gprintf (3)
+for details on generalized output formatting.
+The
+.I unit
+argument describes the unit of activity being measured:
+.TP
+.B BTU_OP
+counts operations of some unspecified nature, while
+.TP
+.B BTU_BYTE
+counts a number of bytes processed.
+.PP
+These are presented differently
+\(em in particular,
+quantities bytes are expressed using binary scaling rather than decimal.
+The timing to report is given by the
+.I t
+argument;
+.IB t ->n
+gives the number of units processed.
+.
+.\"--------------------------------------------------------------------------
+.SH EXAMPLE
 .
+The following macros offer a fairly simple example of
+how the benchmarking functions and macros can be used.
+.VS
+.ta 2n +2n +2n 2n+\w'\fBBENCH_MEASURE_TAG('u \n(.lu-\n(.iu-4n
+#define BENCHMARK_DECLS					\e
+	struct bench_timing _bmark_t;				\e
+	int _bmark_rc;				\e
+	BENCH_MEASURE_DECLS
+.VP
+#define BENCHMARK_TAG(tag, b, unit, base)					\e
+	MC_BEFORE(tag##__benchmark_before, { fflush(stdout); })				\e
+	MC_AFTER(tag##__benchmark_after, {				\e
+		if (_bmark_rc)			\e
+			printf(": FAILED\en");		\e
+		else {			\e
+			fputs(": ", stdout);		\e
+			bench_report(&file_printops, stdout, (unit), &_bmark_tm);\ 	\e
+			putchar('\n');		\e
+		}			\e
+	})				\e
+	BENCH_MEASURE_TAG(tag##__bmarkmark_measure,				\e
+				(b), _bmark_rc, &_bmark_t, (base))
+#define BENCHMARK(b, unit, base)					\e
+	BENCHMARK_TAG(bench, b, unit, base)
+.VE
+
 .\"--------------------------------------------------------------------------
 .SH "SEE ALSO"
 .
+.BR control (3),
+.BR macros (3),
 .BR tvec-bench (3),
 .BR mLib (3).
 .
diff --git a/test/bench.c b/test/bench.c
index 2ac63b4..493f227 100644
--- a/test/bench.c
+++ b/test/bench.c
@@ -963,7 +963,7 @@ struct bench_timer *bench_createtimer(const char *config)
     if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end;
 
   /* All is done. */
-  t->_t.ops = &timer_ops; ret = &t->_t; t = 0;
+  t->_t.ops = &timer_ops; t->_t.ref = 1; ret = &t->_t; t = 0;
 end:
   if (t) timer_destroy(&t->_t);
   return (ret);
@@ -1017,7 +1017,7 @@ end:
  */
 
 void bench_destroy(struct bench_state *b)
-  { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
+  { if (b->tm && !--b->tm->ref) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
 
 /* --- @spin@ --- *
  *
@@ -1080,7 +1080,7 @@ int bench_calibrate(struct bench_state *b, unsigned f)
    */
   if (f&BTF_INDIRECT) {
     for (i = 0; i < 50; i++)
-      BENCH_TIMELOOP_TAG(setup, b, &delta, 10000)
+      BENCH_TIMELOOP_TAG(setup, b->tm, &delta, 10000, ;)
 	LAUNDER(&spin)(_bench_n, 0);
   }
 
@@ -1093,10 +1093,10 @@ int bench_calibrate(struct bench_state *b, unsigned f)
 
     /* Measure @n@ iterations of the idle loop. */
     if (f&BTF_INDIRECT)
-      BENCH_TIMELOOP_TAG(calibrate, b, &delta, n)
+      BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;)
 	LAUNDER(&spin)(_bench_n, 0);
     else
-      BENCH_TIMELOOP_TAG(calibrate, b, &delta, n)
+      BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;)
 	while (_bench_n--) RELAX;
     tf &= delta.f; if (!(tf&BTF_TIMEOK)) { rc = -1; goto end; }
 
@@ -1162,8 +1162,8 @@ int bench_preflight(struct bench_state *b)
 
 /* --- @bench_adapt@ --- *
  *
- * Arguments:	@struct bench_state *b@ = benchmark state
- *		@double *n_inout@ = number of iterations, updated
+ * Arguments:	@double *n_inout@ = number of iterations, updated
+ *		@double target_s@ = target time in seconds
  *		@const struct bench_timing *t@ = timing from the previous run
  *
  * Returns:	Nonzero if the measurement is sufficient; zero to run again.
@@ -1182,15 +1182,15 @@ int bench_preflight(struct bench_state *b)
  *		On entry, @*n_inout@ should be the number of iterations
  *		performed by the previous pass, and @*t@ the resulting time;
  *		the @BTF_TIMEOK@ flag must be set @t->f@.  If the timing is
- *		sufficient -- @t->t@ is sufficiently close to @b->target_s@
+ *		sufficient -- @t->t@ is sufficiently close to @target_s@
  *		-- then the function returns nonzero to indicate that
  *		measurement is complete.  Otherwise, it sets @*n_inout@ to a
  *		new, larger iteration count and returns zero to indicate that
  *		a further pass is necessary.
  */
 
-int bench_adapt(struct bench_state *b,
-		double *n_inout, const struct bench_timing *t)
+int bench_adapt(double *n_inout, double target_s,
+		const struct bench_timing *t)
 {
   double n = *n_inout, nn;
 
@@ -1208,8 +1208,8 @@ int bench_adapt(struct bench_state *b,
    * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying
    * again with %$n' = n + 1$% iterations will very likely work.
    */
-  if (t->t >= 0.707*b->target_s) return (1);
-  nn = n*b->target_s/t->t; modf(nn, &nn);
+  if (t->t >= 0.707*target_s) return (1);
+  nn = n*target_s/t->t; modf(nn, &nn);
   *n_inout = nn > n ? nn : n + 1;
   return (0);
 }
@@ -1279,8 +1279,7 @@ int bench_measure(struct bench_state *b, struct bench_timing *t_out,
   BENCH_MEASURE_DECLS;
   int rc;
 
-  BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
-    fn(_bench_n, ctx);
+  BENCH_MEASURE(b, rc, t_out, base) fn(_bench_n, ctx);
   return (rc);
 }
 
diff --git a/test/bench.h b/test/bench.h
index 3756247..05c1eee 100644
--- a/test/bench.h
+++ b/test/bench.h
@@ -49,11 +49,15 @@
 #  include "dstr.h"
 #endif
 
+#ifndef MLIB_MACROS_H
+#  include "macros.h"
+#endif
+
 #ifndef MLIB_GPRINTF_H
 #  include "gprintf.h"
 #endif
 
-/*----- Data structures ---------------------------------------------------*/
+/*----- Timers ------------------------------------------------------------*/
 
 struct bench_time {
   unsigned f;				/* flags */
@@ -73,7 +77,10 @@ struct bench_timing {
   double n, t, cy;			/* count, time, and cycles */
 };
 
-struct bench_timer { const struct bench_timerops *ops; };
+struct bench_timer {
+  const struct bench_timerops *ops;
+  unsigned ref;
+};
 
 struct bench_timerops {
   void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
@@ -105,24 +112,22 @@ struct bench_timerops {
     /* Release the timer and any resources it holds. */
 };
 
-struct bench_state {
-  struct bench_timer *tm;		/* a timer */
-  double target_s;			/* target time to run benchmarks */
-  unsigned f;				/* calibration flags (@BTF_...@) */
-#define BTF_CLB 0x0100			/*   tried calibrating */
-  struct { double m, c; } clk, cy;	/* calculated overheads */
-};
-
-typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
-  /* Run the benchmark @n@ times, given a context pointer @ctx@. */
-
-enum {
-  BTU_OP,			       /* counting operations of some kind */
-  BTU_BYTE,				/* counting bytes (@rbuf >= 0@) */
-  BTU_LIMIT				/* (number of units) */
-};
+/* --- @bench_createtimer@ --- *
+ *
+ * Arguments:	@const char *config@ = user-supplied configuration string
+ *
+ * Returns:	A freshly constructed standard timer object, or null on
+ *		failure.
+ *
+ * Use:		Allocate a timer.  Dispose of it by calling
+ *		@tm->ops->destroy(tm)@ when you're done.
+ *
+ *		Applications should not set configuration strings except as
+ *		established by user action, e.g., from a command-line option,
+ *		environment variable, or configuration file.
+ */
 
-/*----- Macros ------------------------------------------------------------*/
+extern struct bench_timer *bench_createtimer(const char *config);
 
 /* --- @BENCH_TIMELOOP_DECLS@ --- *
  *
@@ -140,28 +145,28 @@ enum {
 /* --- @BENCH_TIMELOOP_TAG@ --- *
  *
  * Arguments:	@tag@ = control structure tag
- *		@struct bench_state *b@ = benchmark state
+ *		@struct bench_state *tm@ = benchmark timer
  *		@struct bench_timing *delta_out@ = where to put the result
  *		@double n@ = number of iterations
  *
  * Returns:	---
  *
- * Use:		@BENCH_TIMELOOP_TAG(tag, b, delta_out, n) stmt@ runs @stmt@
+ * Use:		@BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
  *		@n@ times, capturing the time and/or CPU cycles taken in
  *		@*delta_out@.  The iteration count must be no more than the
  *		%%\emph{square}%% of @ULONG_MAX@.  If @stmt@ executes a free
- *		@break@ statement, then the containing loop -- which ust
+ *		@break@ statement, then the containing loop -- which must
  *		exist -- is exited.
  *
  *		This macro is not intended to be used directly by users: it
  *		is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
  */
 
-#define BENCH_TIMELOOP_TAG(tag, b, delta_out, n)			\
+#define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak)		\
   MC_BEFORE(tag##__benchtl_setup, {					\
     double _R = ULONG_MAX; double _n = (n);				\
 									\
-    _bench_tm = (b)->tm;						\
+    _bench_tm = (tm);							\
     if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; }			\
     else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; }		\
   })									\
@@ -178,131 +183,18 @@ enum {
 	{ ; },								\
 	{ MC_GOTARGET(tag##__benchtl_break); })
 
-/* --- @BENCH_MEASURE_DECLS@ --- *
- *
- * Arguments:	---
- *
- * Use:		Expands to the declarations needed by @BENCH_MEASURE@.
- *		These should be at block scope, not at toplevel.
- */
-
-#define BENCH_MEASURE_DECLS						\
-  struct bench_state *_bench_b;						\
-  struct bench_timing *_bench_t;					\
-  double _bench_nn;							\
-  BENCH_TIMELOOP_DECLS
-
-/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
- *
- * Arguments:	@tag@ = control structure tag
- *		@struct bench_state *b@ = benchmark state
- *		@int &rc@ = where to put the result code (zero on success,
- *			%$-1$% on failure)
- *		@struct bench_timing *t_out@ = where to put the result
- *		@double base@ = number of operations per external iteration
- *
- * Returns:	---
- *
- * Use:		@BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
- *		execution of @stmt@.
- *
- *		The statement should perform @_bench_n@ iterations of some
- *		operation.  It will be invoked multiple times, with varying
- *		iteration counts, so as to run for approximately
- *		@b->target_s@ seconds.
- *
- *		On success, the resulting timing is left in @*t_out@, with
- *		@t_out->n@ holding the product of the final iteration count
- *		and @base@, and @rc@ is set to zero.  If the timer fails, or
- *		if @stmt@ invokes a free @break@ statement, then @rc@ is set
- *		to %$-1$%.
- *
- *		The macro @BENCH_MEASURE_TAG@ is the same, except that it
- *		allows an explicit control-structure tag to be set so that it
- *		can be used within larger control structure macros.
- */
-
-#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)			\
-  MC_BEFORE(tag##__benchmsr_setup, {					\
-    _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;		\
-    if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);	\
-  })									\
-  MC_TARGET(tag##__benchmsr_done,					\
-    { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
-  MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })			\
-  for (;;)								\
-    MC_WRAP(tag##__benchmsr_loop,					\
-      { ; },								\
-      { _bench_t->f &= _bench_b->f;					\
-	if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
-	if (bench_adapt(_bench_b, &_bench_nn, _bench_t))		\
-	  MC_GOTARGET(tag##__benchmsr_done);				\
-      },								\
-      { MC_GOTARGET(tag##__benchmsr_fail); })				\
-    BENCH_TIMELOOP_TAG(tag##__benchmsr_time, _bench_b, _bench_t, _bench_nn)
-
-#define BENCH_MEASURE(b, rc, t_out, base)				\
-  BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
-
-/* --- @BENCHMARK_DECLS@ --- *
- *
- * Arguments:	---
- *
- * Use:		Expands to the declarations needed by @BENCHMARK_TAG@.
- *		These should be at block scope, not at toplevel.
- */
-
-#define BENCHMARK_DECLS							\
-  struct bench_timing _bench_tm;					\
-  int _bench_rc;							\
-  BENCH_MEASURE_DECLS
-
-/* --- @BENCHMARK_TAG@ --- *
- *
- * Arguments:	@tag@ = control structure tag
- *		@struct bench_state *b@ = benchmark state
- *		@struct gprintf_ops *gops, void *go@ = output formatter
- *		@unsigned unit@ = unit being measured (@BTU_...@ code)
- *		@double base@ = number of units per external iteration
- *
- * Returns:	---
- *
- * Use:		@BENCHMARK_TAG(tag, b, gops, go, unit, base) stmt@ measures
- *		the execution of @stmt@ and writes a report to an output
- *		formatter.  The @stmt@ should run @_bench_n@ iterations of
- *		the operation to be measured.
- *
- *		No tagless version of this macro is provided, because it is
- *		expected to be useful primarily in the construction of
- *		higher-level macros.
- */
-
-#define BENCHMARK_TAG(tag, b, gops, go, unit, base)			\
-  MC_AFTER(tag##__benchmark_after, {					\
-    if (_bench_rc) gprintf((gops), (go), "FAILED");			\
-    else bench_report((gops), (go), _bench_b, (unit), &_bench_tm);	\
-  })									\
-  BENCH_MEASURE_TAG(tag##__benchmark_measure,				\
-		    (b), _bench_rc, &_bench_tm, (base))
-
-/*----- Functions provided ------------------------------------------------*/
+/*----- Measuring ---------------------------------------------------------*/
 
-/* --- @bench_createtimer@ --- *
- *
- * Arguments:	@const char *config@ = user-supplied configuration string
- *
- * Returns:	A freshly constructed standard timer object, or null on
- *		failure.
- *
- * Use:		Allocate a timer.  Dispose of it by calling
- *		@tm->ops->destroy(tm)@ when you're done.
- *
- *		Applications should not set configuration strings except as
- *		established by user action, e.g., from a command-line option,
- *		environment variable, or configuration file.
- */
+struct bench_state {
+  struct bench_timer *tm;		/* a timer */
+  double target_s;			/* target time to run benchmarks */
+  unsigned f;				/* calibration flags (@BTF_...@) */
+#define BTF_CLB 0x0100			/*   tried calibrating */
+  struct { double m, c; } clk, cy;	/* calculated overheads */
+};
 
-extern struct bench_timer *bench_createtimer(const char *config);
+typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
+  /* Run the benchmark @n@ times, given a context pointer @ctx@. */
 
 /* --- @bench_init@ --- *
  *
@@ -377,8 +269,8 @@ extern int bench_preflight(struct bench_state */*b*/);
 
 /* --- @bench_adapt@ --- *
  *
- * Arguments:	@struct bench_state *b@ = benchmark state
- *		@double *n_inout@ = number of iterations, updated
+ * Arguments:	@double *n_inout@ = number of iterations, updated
+ *		@double target_s@ = target time in seconds
  *		@const struct bench_timing *t@ = timing from the previous run
  *
  * Returns:	Nonzero if the measurement is sufficient; zero to run again.
@@ -387,17 +279,20 @@ extern int bench_preflight(struct bench_state */*b*/);
  *		benchmark function to perform next.  It is used in a loop
  *		such as the following.
  *
- *			@double n = 1.0;@
+ *			@BENCH_TIMELOOP_DECLS;@
+ *			@struct bench_timer *tm;@
  *			@struct bench_timing t;@
+ *			@double n = 1.0, target_s = 1.0;@
  *
  *			@do {@
- *			  (run @n@ iterations; set @t@ to the timing)
- *			@} while (!bench_adapt(b, &n, &t));@
+ *			  @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
+ *			    (do @_bench_n@ iterations of some computation)@;@
+ *			@} while (!bench_adapt(&n, target_s, &t));@
  *
  *		On entry, @*n_inout@ should be the number of iterations
  *		performed by the previous pass, and @*t@ the resulting time;
- *		the @BTF_TIMEOK@ flag must be set @t->f@.  If the timing is
- *		sufficient -- @t->t@ is sufficiently close to @b->target_s@
+ *		the @BTF_TIMEOK@ flag must be set in @t->f@.  If the timing
+ *		is sufficient -- @t->t@ is sufficiently close to @target_s@
  *		-- then the function returns nonzero to indicate that
  *		measurement is complete.  Otherwise, it sets @*n_inout@ to a
  *		new, larger iteration count and returns zero to indicate that
@@ -408,7 +303,7 @@ extern int bench_preflight(struct bench_state */*b*/);
  *		@bench_measure@ function.
  */
 
-extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/,
+extern int bench_adapt(double */*n_inout*/, double /*target_s*/,
 		       const struct bench_timing */*t*/);
 
 /* --- @bench_adjust@ --- *
@@ -421,7 +316,7 @@ extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/,
  *
  * Returns:	---
  *
- * Use:		Adjusts a raw timing, as captured by @BENCH_TIMELOOP@,
+ * Use:		Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
  *		according to the calibration data captured in @b@.
  *		On exit, the timing data is updated, and @t->n@ is set to the
  *		product @n*base@.
@@ -435,6 +330,73 @@ extern void bench_adjust(struct bench_state */*b*/,
 			 struct bench_timing */*t_inout*/,
 			 double /*n*/, double /*base*/);
 
+/* --- @BENCH_MEASURE_DECLS@ --- *
+ *
+ * Arguments:	---
+ *
+ * Use:		Expands to the declarations needed by @BENCH_MEASURE@.
+ *		These should be at block scope, not at toplevel.
+ */
+
+#define BENCH_MEASURE_DECLS						\
+  struct bench_state *_bench_b;						\
+  struct bench_timing *_bench_t;					\
+  double _bench_nn;							\
+  BENCH_TIMELOOP_DECLS
+
+/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
+ *
+ * Arguments:	@tag@ = control structure tag
+ *		@struct bench_state *b@ = benchmark state
+ *		@int &rc@ = where to put the result code (zero on success,
+ *			%$-1$% on failure)
+ *		@struct bench_timing *t_out@ = where to put the result
+ *		@double base@ = number of operations per external iteration
+ *
+ * Returns:	---
+ *
+ * Use:		@BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
+ *		execution of @stmt@.
+ *
+ *		The statement should perform @_bench_n@ iterations of some
+ *		operation.  It will be invoked multiple times, with varying
+ *		iteration counts, so as to run for approximately
+ *		@b->target_s@ seconds.
+ *
+ *		On success, the resulting timing is left in @*t_out@, with
+ *		@t_out->n@ holding the product of the final iteration count
+ *		and @base@, and @rc@ is set to zero.  If the timer fails, or
+ *		if @stmt@ invokes a free @break@ statement, then @rc@ is set
+ *		to %$-1$%.
+ *
+ *		The macro @BENCH_MEASURE_TAG@ is the same, except that it
+ *		allows an explicit control-structure tag to be set so that it
+ *		can be used within larger control structure macros.
+ */
+
+#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)			\
+  MC_BEFORE(tag##__benchmsr_setup, {					\
+    _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;		\
+    if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);	\
+  })									\
+  MC_TARGET(tag##__benchmsr_done,					\
+    { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
+  MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })			\
+  for (;;)								\
+    MC_WRAP(tag##__benchmsr_loop,					\
+      { ; },								\
+      { _bench_t->f &= _bench_b->f;					\
+	if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
+	if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t))	\
+	  MC_GOTARGET(tag##__benchmsr_done);				\
+      },								\
+      { MC_GOTARGET(tag##__benchmsr_fail); })				\
+    BENCH_TIMELOOP_TAG(tag##__benchmsr_time,				\
+		       _bench_b->tm, _bench_t, _bench_nn, { break; })
+
+#define BENCH_MEASURE(b, rc, t_out, base)				\
+  BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
+
 /* --- @bench_measure@ --- *
  *
  * Arguments:	@struct bench_state *b@ = benchmark state
@@ -460,6 +422,12 @@ extern int bench_measure(struct bench_state */*b*/,
 
 /*----- Reporting ---------------------------------------------------------*/
 
+enum {
+  BTU_OP,			       /* counting operations of some kind */
+  BTU_BYTE,				/* counting bytes (@rbuf >= 0@) */
+  BTU_LIMIT				/* (number of units) */
+};
+
 /* --- @bench_report@ --- *
  *
  * Arguments:	@const struct gprintf_ops *gops, void *gp@ = output formatter
diff --git a/test/example/Makefile.am b/test/example/Makefile.am
index eae66f8..663efbe 100644
--- a/test/example/Makefile.am
+++ b/test/example/Makefile.am
@@ -42,5 +42,14 @@ noinst_PROGRAMS		+= adhoc
 adhoc_SOURCES		 =
 
 adhoc_SOURCES		+= adhoc.c
+adhoc_SOURCES		+= fib.c
+adhoc_SOURCES		+= example.h
+
+noinst_PROGRAMS		+= bench
+bench_SOURCES		 =
+
+bench_SOURCES		+= bench.c
+bench_SOURCES		+= fib.c
+bench_SOURCES		+= example.h
 
 ###----- That's all, folks --------------------------------------------------
diff --git a/test/example/adhoc.c b/test/example/adhoc.c
index fbaa69f..d1d549e 100644
--- a/test/example/adhoc.c
+++ b/test/example/adhoc.c
@@ -27,7 +27,6 @@
 
 /*----- Header files ------------------------------------------------------*/
 
-#include <limits.h>
 #include <stdio.h>
 
 #include "macros.h"
@@ -37,64 +36,21 @@
 #include "tvec-bench.h"
 #include "tvec-types.h"
 
-/*----- Main code ---------------------------------------------------------*/
-
-/* Stupid but traditional recursive Fibonacci. */
-static unsigned long recfib(unsigned n)
-  { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); }
-
-/* Slightly less stupid but still traditional iterative Fibonacci. */
-static unsigned long iterfib(unsigned n)
-{
-  unsigned long u, v, t;
-
-  for (u = 0, v = 1; n--; t = v, v = u, u += t);
-  return (u);
-}
+#include "example.h"
 
-/* Sadly nontraditional intelligent Fibonacci. */
-static unsigned long expfib(unsigned n)
-{
-  unsigned long a, b, u, v, t;
-
-  /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%.  I claim that
-   * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%.  Proof by induction:
-   * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$%
-   * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$%
-   * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$%
-   * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed.
-   *
-   * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$%
-   * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$%
-   * %$b d = (a c + a d + b c) \phi + (a c + b d)$%.  In particular,
-   * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%.
-   */
-  a = 0, b = 1; u = 1, v = 0;
-  if (n)
-    for (;;) {
-      if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; }
-      n /= 2; if (!n) break;
-      t = u*u; u = t + 2*u*v; v = t + v*v;
-    }
-  return (a);
-}
+/*----- Main code ---------------------------------------------------------*/
 
 int main(int argc, char *argv[])
 {
   struct tvec_state tvstate;
   int argpos;
   unsigned long i, a, b, t;
+  dstr d = DSTR_INIT;
 
   tvec_parseargs(argc, argv, &tvstate, &argpos, &tvec_adhocconfig);
   if (argpos < argc) die(2, "no input files expected");
   tvec_adhoc(&tvstate);
 
-#if (ULONG_MAX/65536 >> 16) >= 0xffffffff
-#  define FIBLIMIT 94		     /* F_94 = 19740274219868223167 > 2^64 */
-#else
-#  define FIBLIMIT 48			/* F_48 = 4807526976 > 2^32 */
-#endif
-
   TVEC_TESTGROUP(&tvstate, "fib-test") {
     for (i = 0, a = 0, b = 1; i < FIBLIMIT; i++, t = b, b = a, a += t)
       TVEC_TEST(&tvstate) {
@@ -109,16 +65,20 @@ int main(int argc, char *argv[])
 
     if (tvec_benchprep(&tvstate, &tvec_benchstate, 0)) break;
 
-    TVEC_BENCHMARK("recfib, n = 40", &tvstate, tvec_benchstate, BTU_OP, 1)
-      while (_bench_n--) ADMIRE(recfib(40));
-    TVEC_BENCHMARK("iterfib, n = " STR(FIBLIMIT),
-		   &tvstate, tvec_benchstate, BTU_OP, 1)
+    dstr_reset(&d); dstr_putf(&d, "recfib, n = %u", RECFIBLIMIT);
+    TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
+      while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT));
+
+    dstr_reset(&d); dstr_putf(&d, "iterfib, n = %u", FIBLIMIT);
+    TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
       while (_bench_n--) ADMIRE(iterfib(FIBLIMIT));
-    TVEC_BENCHMARK("expfib, n = " STR(FIBLIMIT),
-		   &tvstate, tvec_benchstate, BTU_OP, 1)
+
+    dstr_reset(&d); dstr_putf(&d, "expfib, n = %u", FIBLIMIT);
+    TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
       while (_bench_n--) ADMIRE(expfib(FIBLIMIT));
   }
 
+  dstr_destroy(&d);
   return (tvec_end(&tvstate));
 }
 
diff --git a/test/example/bench.c b/test/example/bench.c
new file mode 100644
index 0000000..144cf77
--- /dev/null
+++ b/test/example/bench.c
@@ -0,0 +1,91 @@
+/* -*-c-*-
+ *
+ * Demonstration of standalone benchmarking
+ *
+ * (c) 2024 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib.  If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "macros.h"
+#include "report.h"
+#include "tvec.h"
+#include "tvec-adhoc.h"
+#include "tvec-bench.h"
+#include "tvec-types.h"
+
+#include "example.h"
+
+/*----- Macro from the manpage --------------------------------------------*/
+
+#define BENCHMARK_DECLS							\
+  struct bench_timing _bmark_t;						\
+  int _bmark_rc;							\
+  BENCH_MEASURE_DECLS
+
+#define BENCHMARK_TAG(tag, b, unit, base)				\
+  MC_BEFORE(tag##__benchmark_before, { fflush(stdout); })		\
+  MC_AFTER(tag##__benchmark_after, {					\
+    if (_bmark_rc)							\
+      printf(": FAILED\n");						\
+    else {								\
+      fputs(": ", stdout);						\
+      bench_report(&file_printops, stdout, (unit), &_bmark_t);		\
+      putchar('\n');							\
+    }									\
+  })									\
+  BENCH_MEASURE_TAG(tag##__benchmark_measure,				\
+		    (b), _bmark_rc, &_bmark_t, (base))
+#define BENCHMARK(b, unit, base) BENCHMARK_TAG(bench, b, unit, base)
+
+/*----- Main code ---------------------------------------------------------*/
+
+int main(void)
+{
+  struct bench_state b;
+  BENCHMARK_DECLS;
+
+  if (bench_init(&b, 0))
+    { fprintf(stderr, "timer setup failed\n"); exit(2); }
+  if (bench_calibrate(&b, 0))
+    { fprintf(stderr, "timer calibration failed\n"); exit(2); }
+
+  printf("recfib, n = %u", RECFIBLIMIT);
+  BENCHMARK(&b, BTU_OP, 1)
+    while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT));
+
+  printf("iterfib, n = %u", FIBLIMIT);
+  BENCHMARK(&b, BTU_OP, 1)
+    while (_bench_n--) ADMIRE(iterfib(FIBLIMIT));
+
+  printf("expfib, n = %u", FIBLIMIT);
+  BENCHMARK(&b, BTU_OP, 1)
+    while (_bench_n--) ADMIRE(expfib(FIBLIMIT));
+
+  return (0);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/test/example/example.h b/test/example/example.h
index b3cdee8..ecbaa6b 100644
--- a/test/example/example.h
+++ b/test/example/example.h
@@ -34,6 +34,7 @@
 
 /*----- Header files ------------------------------------------------------*/
 
+#include <limits.h>
 #include <stddef.h>
 
 /*----- Functions provided ------------------------------------------------*/
@@ -47,6 +48,22 @@ extern int greet(char */*buf*/, size_t /*sz*/, const char */*name*/);
    * Return zero on success, or -1 on error.
    */
 
+extern unsigned long recfib(unsigned /*n*/);
+  /* Stupid but traditional recursive Fibonacci. */
+
+extern unsigned long iterfib(unsigned /*n*/);
+  /* Slightly less stupid but still traditional iterative Fibonacci. */
+
+extern unsigned long expfib(unsigned /*n*/);
+  /* Sadly nontraditional intelligent Fibonacci. */
+
+#define RECFIBLIMIT 40			/* too slow beyond this */
+#if (ULONG_MAX/65536 >> 16) >= 0xffffffff
+#  define FIBLIMIT 94		     /* F_94 = 19740274219868223167 > 2^64 */
+#else
+#  define FIBLIMIT 48			/* F_48 = 4807526976 > 2^32 */
+#endif
+
 /*----- That's all, folks -------------------------------------------------*/
 
 #ifdef __cplusplus
diff --git a/test/example/fib.c b/test/example/fib.c
new file mode 100644
index 0000000..1639aa4
--- /dev/null
+++ b/test/example/fib.c
@@ -0,0 +1,71 @@
+/* -*-c-*-
+ *
+ * Compute elements of the Fibonacci sequence
+ *
+ * (c) 2024 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib.  If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "example.h"
+
+/*----- Main code ---------------------------------------------------------*/
+
+unsigned long recfib(unsigned n)
+  { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); }
+
+unsigned long iterfib(unsigned n)
+{
+  unsigned long u, v, t;
+
+  for (u = 0, v = 1; n--; t = v, v = u, u += t);
+  return (u);
+}
+
+unsigned long expfib(unsigned n)
+{
+  unsigned long a, b, u, v, t;
+
+  /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%.  I claim that
+   * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%.  Proof by induction:
+   * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$%
+   * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$%
+   * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$%
+   * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed.
+   *
+   * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$%
+   * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$%
+   * %$b d = (a c + a d + b c) \phi + (a c + b d)$%.  In particular,
+   * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%.
+   */
+  a = 0, b = 1; u = 1, v = 0;
+  if (n)
+    for (;;) {
+      if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; }
+      n /= 2; if (!n) break;
+      t = u*u; u = t + 2*u*v; v = t + v*v;
+    }
+  return (a);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/utils/align.3.in b/utils/align.3.in
index b6e2a9c..a9b9893 100644
--- a/utils/align.3.in
+++ b/utils/align.3.in
@@ -30,6 +30,7 @@
 .\"--------------------------------------------------------------------------
 .TH align 3mLib "4 January 2009" "Straylight/Edgeware" "mLib utilities library"
 .\" @ALIGN
+.\" @ALIGNOF
 .
 .\"--------------------------------------------------------------------------
 .SH NAME
@@ -41,21 +42,52 @@ align \- alignment utilities
 .nf
 .B "#include <mLib/align.h>"
 .PP
-.BI "size_t ALIGN(size_t " sz ");"
+.B "union align { ...\& };"
+.PP
+.BI "void ALIGN(size_t &" sz ");"
+.BI "size_t ALIGNOF(" type );
 .fi
 .
 .\"--------------------------------------------------------------------------
 .SH DESCRIPTION
+.
+The
+.B "union align"
+type is (a reasonable guess at) a type with the
+implementation's most restrictive alignment requirements;
+i.e., if a pointer is suitably aligned for
+.B "union align"
+value,
+then it should be suitably aligned for any other value too.
+.PP
+The
+.B ALIGNOF
+macro returns the alignment required for the given
+.IR type ;
+i.e., if an address is suitably aligned for a
+.I type
+value if and only if it is a multiple of
+.BR ALIGNOF( type ) \fR.
+.PP
 The
 .B ALIGN
-macro returns the value of its argument
+macro rounds its argument
+.I sz
+up to the next multiple of
+.BR "ALIGNOF(union align)" :
+so, if
+.I sz
+is an offset from the start of a suitably aligned chunk of memory,
+e.g., returned by
+.BR malloc (3),
+then
+.BI ALIGN( sz )
+adjusts
 .I sz
-rounded up to the next multiple of the size of
-.BR "union align" ,
-which is defined as a union containing a selection of built-in types.
-The intent is to write fairly portable memory allocators, which must
-return correctly-aligned memory.
-.IR array .
+in place, including additional padding
+so that the new offset is also suitably aligned.
+The intent is to assist with writing fairly portable memory allocators,
+which must return correctly-aligned memory.
 .
 .\"--------------------------------------------------------------------------
 .SH "SEE ALSO"
diff --git a/utils/fltfmt.c b/utils/fltfmt.c
index 291557b..89c705d 100644
--- a/utils/fltfmt.c
+++ b/utils/fltfmt.c
@@ -56,8 +56,8 @@
 
 /*----- Some useful constants ---------------------------------------------*/
 
-#define B31 0x80000000			/* just bit 31 */
-#define B30 0x40000000			/* just bit 30 */
+#define B31 0x80000000u			/* just bit 31 */
+#define B30 0x40000000u			/* just bit 30 */
 
 #define SH32 4294967296.0		/* 2^32 as floating-point */
 
diff --git a/utils/macros.h b/utils/macros.h
index bc83629..5b77225 100644
--- a/utils/macros.h
+++ b/utils/macros.h
@@ -61,16 +61,18 @@
 #define MLIB__STR(x) #x
 #define STR(x) MLIB__STR(x)
 
-/* --- @GLUE@ --- *
+/* --- @GLUE@, @GLUE3@ --- *
  *
  * Arguments:	@x, y@ = two sequences of tokens
+ *		@z@ = a third sequence of tokens
  *
  * Returns:	A single token formed by gluing together the macro-expansions
- *		of @x@ and @y@.
+ *		of @x@ and @y@, and @z@ for @GLUE3@.
  */
 
 #define MLIB__GLUE(x, y) x##y
 #define GLUE(x, y) MLIB__GLUE(x, y)
+#define GLUE3(x, y, z) GLUE(x, MLIB__GLUE(y, z))
 
 /* --- @STATIC_ASSERT@ --- *
  *
@@ -144,6 +146,15 @@
 #define UNQUALIFY(type, p)						\
 	CONVERT_CAREFULLY(type *, const volatile type *, p)
 
+/* --- @EMPTY@ --- *
+ *
+ * Arguments:	---
+ *
+ * Returns:	The empty token sequence.
+ */
+
+#define EMPTY
+
 /* --- @COMMA@ --- *
  *
  * Arguments:	---
diff --git a/utils/maths.h b/utils/maths.h
index fe33e8b..37227e9 100644
--- a/utils/maths.h
+++ b/utils/maths.h
@@ -36,10 +36,6 @@
 
 #include <math.h>
 
-#ifndef MLIB_BITS_H
-#  include "bits.h"
-#endif
-
 /*----- Macros provided ---------------------------------------------------*/
 
 /* --- @NANP@ --- *
@@ -86,8 +82,6 @@
 #  define NEGP(x) ((x) < 0)
 #endif
 
-/*----- Floating-point encoding and decoding ------------------------------*/
-
 /*----- That's all, folks -------------------------------------------------*/
 
 #ifdef __cplusplus
diff --git a/utils/t/fltfmt-test.c b/utils/t/fltfmt-test.c
index 76e1d41..5b60607 100644
--- a/utils/t/fltfmt-test.c
+++ b/utils/t/fltfmt-test.c
@@ -79,18 +79,18 @@ static const struct tvec_flaginfo flterrmask_flaginfo =
 static const struct tvec_flag fltrnd_flags[] = {
 
   /* Standard rounding modes. */
-  { "zero",		0xffff,		FLTRND_ZERO },
-  { "projinf",		0xffff,		FLTRND_PROJINF },
-  { "posinf",		0xffff,		FLTRND_POSINF },
-  { "neginf",		0xffff,		FLTRND_NEGINF },
-  { "odd",		0xffff,		FLTRND_ODD },
-  { "even",		0xffff,		FLTRND_EVEN },
-  { "nearest-even",	0xffff,		FLTRND_NEAREVEN },
-  { "nearest-odd",	0xffff,		FLTRND_NEARODD },
-  { "nearest-zero",	0xffff,		FLTRND_NEARZERO },
-  { "nearest-projinf",	0xffff,		FLTRND_NEARINF },
-  { "nearest-neginf",	0xffff,		FLTRND_NEARNEG },
-  { "nearest-posinf",	0xffff,		FLTRND_NEARPOS },
+  { "zero",		0xffffu,	FLTRND_ZERO },
+  { "projinf",		0xffffu,	FLTRND_PROJINF },
+  { "posinf",		0xffffu,	FLTRND_POSINF },
+  { "neginf",		0xffffu,	FLTRND_NEGINF },
+  { "odd",		0xffffu,	FLTRND_ODD },
+  { "even",		0xffffu,	FLTRND_EVEN },
+  { "nearest-even",	0xffffu,	FLTRND_NEAREVEN },
+  { "nearest-odd",	0xffffu,	FLTRND_NEARODD },
+  { "nearest-zero",	0xffffu,	FLTRND_NEARZERO },
+  { "nearest-projinf",	0xffffu,	FLTRND_NEARINF },
+  { "nearest-neginf",	0xffffu,	FLTRND_NEARNEG },
+  { "nearest-posinf",	0xffffu,	FLTRND_NEARPOS },
 
   /* Rounding mode bits: rounds away from zero in the listed conditions.
    * The notation corresponds to rounding predicates as follows.  The syntax
@@ -102,22 +102,22 @@ static const struct tvec_flag fltrnd_flags[] = {
    *	<h>	@HALF@	%|0|%	%|1|%
    *	<r>	@LOW@	%|0|%	%|1|%
    */
-  { "+0.00",		0x0001,		0x0001 },
-  { "+0.01",		0x0002,		0x0002 },
-  { "+0.10",		0x0004,		0x0004 },
-  { "+0.11",		0x0008,		0x0008 },
-  { "+1.00",		0x0010,		0x0010 },
-  { "+1.01",		0x0020,		0x0020 },
-  { "+1.10",		0x0040,		0x0040 },
-  { "+1.11",		0x0080,		0x0080 },
-  { "-0.00",		0x0100,		0x0100 },
-  { "-0.01",		0x0200,		0x0200 },
-  { "-0.10",		0x0400,		0x0400 },
-  { "-0.11",		0x0800,		0x0800 },
-  { "-1.00",		0x1000,		0x1000 },
-  { "-1.01",		0x2000,		0x2000 },
-  { "-1.10",		0x4000,		0x4000 },
-  { "-1.11",		0x8000,		0x8000 },
+  { "+0.00",		0x0001u,	0x0001u },
+  { "+0.01",		0x0002u,	0x0002u },
+  { "+0.10",		0x0004u,	0x0004u },
+  { "+0.11",		0x0008u,	0x0008u },
+  { "+1.00",		0x0010u,	0x0010u },
+  { "+1.01",		0x0020u,	0x0020u },
+  { "+1.10",		0x0040u,	0x0040u },
+  { "+1.11",		0x0080u,	0x0080u },
+  { "-0.00",		0x0100u,	0x0100u },
+  { "-0.01",		0x0200u,	0x0200u },
+  { "-0.10",		0x0400u,	0x0400u },
+  { "-0.11",		0x0800u,	0x0800u },
+  { "-1.00",		0x1000u,	0x1000u },
+  { "-1.01",		0x2000u,	0x2000u },
+  { "-1.10",		0x4000u,	0x4000u },
+  { "-1.11",		0x8000u,	0x8000u },
 
   /* Phew! */
   TVEC_ENDFLAGS
-- 
[mdw]