#include "bits.h"
#include "buf.h"
#include "fltfmt.h"
+#include "macros.h"
-/*----- External functions ------------------------------------------------*/
+/*----- Constants ---------------------------------------------------------*/
+
+/* Tolerable errors. These aren't great, and all of them imply a failure to
+ * faithfully pass the value on, but they're also the inevitable consequence
+ * of having different floating-point systems.
+ */
+#define IGNERR (FLTERR_INEXACT | FLTERR_OFLOW | FLTERR_UFLOW)
+
+/*----- Main code ---------------------------------------------------------*/
+
+#define FORMATS(_) \
+ _(flt, float, f32, 4) \
+ _(dbl, double, f64, 8)
/* --- @buf_getf{32,64}{,l,b} --- *
*
* the usual round-to-nearest/ties-to-even rounding mode.
*/
-int buf_getf32(buf *b, float *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_f32btoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf32l(buf *b, float *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf32b(buf *b, float *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_getf32)(dbuf *db, float *x_out)
- { return (dbuf_getf32(db, x_out)); }
-int (dbuf_getf32l)(dbuf *db, float *x_out)
- { return (dbuf_getf32l(db, x_out)); }
-int (dbuf_getf32b)(dbuf *db, float *x_out)
- { return (dbuf_getf32b(db, x_out)); }
-
-int buf_getf64(buf *b, double *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_f64btodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf64l(buf *b, double *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_getf64b(buf *b, double *x_out)
-{
- const octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_getf64)(dbuf *db, double *x_out)
- { return (dbuf_getf64(db, x_out)); }
-int (dbuf_getf64l)(dbuf *db, double *x_out)
- { return (dbuf_getf64l(db, x_out)); }
-int (dbuf_getf64b)(dbuf *db, double *x_out)
- { return (dbuf_getf64b(db, x_out)); }
+#define DEFGET1(ty, cty, fty, e, xe, w) \
+ int GLUE3(buf_get, fty, xe)(buf *b, cty *x_out) \
+ { \
+ const octet *p; \
+ unsigned err; \
+ \
+ p = buf_get(b, w); if (!p) return (-1); \
+ err = fltfmt_##fty##e##to##ty(x_out, p, FLTRND_NEAREVEN); \
+ if (err&~IGNERR) { BBREAK(b); return (-1); } \
+ return (0); \
+ } \
+ int (GLUE3(dbuf_get, fty, xe))(dbuf *db, cty *x_out) \
+ { return (GLUE3(dbuf_get, fty, xe)(db, x_out)); }
+
+#define DEFGET(ty, cty, fty, w) \
+ DEFGET1(ty, cty, fty, b, EMPTY, w) \
+ DEFGET1(ty, cty, fty, l, l, w) \
+ DEFGET1(ty, cty, fty, b, b, w)
+
+FORMATS(DEFGET)
+
+#undef DEFGET1
+#undef DEFGET
/* --- @buf_putf{32,64}{,l,b} --- *
*
* the usual round-to-nearest/ties-to-even rounding mode.
*/
-int buf_putf32(buf *b, float x)
-{
- octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf32l(buf *b, float x)
-{
- octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_flttof32l(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf32b(buf *b, float x)
-{
- octet *p;
-
- p = buf_get(b, 4); if (!p) return (-1);
- fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_putf32)(dbuf *db, float x)
- { return (dbuf_putf32(db, x)); }
-int (dbuf_putf32l)(dbuf *db, float x)
- { return (dbuf_putf32l(db, x)); }
-int (dbuf_putf32b)(dbuf *db, float x)
- { return (dbuf_putf32b(db, x)); }
-
-int buf_putf64(buf *b, double x)
-{
- octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf64l(buf *b, double x)
-{
- octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_dbltof64l(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int buf_putf64b(buf *b, double x)
-{
- octet *p;
-
- p = buf_get(b, 8); if (!p) return (-1);
- fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0);
-}
-
-int (dbuf_putf64)(dbuf *db, double x)
- { return (dbuf_putf64(db, x)); }
-int (dbuf_putf64l)(dbuf *db, double x)
- { return (dbuf_putf64l(db, x)); }
-int (dbuf_putf64b)(dbuf *db, double x)
- { return (dbuf_putf64b(db, x)); }
+#define DEFPUT1(ty, cty, fty, e, xe, w) \
+ int GLUE3(buf_put, fty, xe)(buf *b, cty x) \
+ { \
+ octet *p; \
+ unsigned err; \
+ \
+ p = buf_get(b, w); if (!p) return (-1); \
+ err = fltfmt_##ty##to##fty##e(p, x, FLTRND_NEAREVEN); \
+ if (err&~IGNERR) { BBREAK(b); return (-1); } \
+ return (0); \
+ } \
+ int (GLUE3(dbuf_put, fty, xe))(dbuf *db, cty x) \
+ { return (GLUE3(dbuf_put, fty, xe)(db, x)); }
+
+#define DEFPUT(ty, cty, fty, w) \
+ DEFPUT1(ty, cty, fty, b, EMPTY, w) \
+ DEFPUT1(ty, cty, fty, l, l, w) \
+ DEFPUT1(ty, cty, fty, b, b, w)
+
+FORMATS(DEFPUT)
+
+#undef DEFPUT1
+#undef DEFPUT
/*----- That's all, folks -------------------------------------------------*/
#define dbuf_getf32l(db, x_out) (buf_getf32l(DBUF_BUF(db), (x_out)))
#define dbuf_getf32b(db, x_out) (buf_getf32b(DBUF_BUF(db), (x_out)))
-extern int dbuf_getf64(dbuf */*db*/, double */*x_out*/);
-extern int dbuf_getf64l(dbuf */*db*/, double */*x_out*/);
-extern int dbuf_getf64b(dbuf */*db*/, double */*x_out*/);
+extern int buf_getf64(buf */*b*/, double */*x_out*/);
+extern int buf_getf64l(buf */*b*/, double */*x_out*/);
+extern int buf_getf64b(buf */*b*/, double */*x_out*/);
#define dbuf_getf64(db, x_out) (buf_getf64(DBUF_BUF(db), (x_out)))
#define dbuf_getf64l(db, x_out) (buf_getf64l(DBUF_BUF(db), (x_out)))
#define dbuf_getf64b(db, x_out) (buf_getf64b(DBUF_BUF(db), (x_out)))
#define dbuf_putf32l(db, x) (buf_putf32l(DBUF_BUF(db), (x)))
#define dbuf_putf32b(db, x) (buf_putf32b(DBUF_BUF(db), (x)))
-extern int dbuf_putf64(dbuf */*db*/, double /*x*/);
-extern int dbuf_putf64l(dbuf */*db*/, double /*x*/);
-extern int dbuf_putf64b(dbuf */*db*/, double /*x*/);
+extern int buf_putf64(buf */*b*/, double /*x*/);
+extern int buf_putf64l(buf */*b*/, double /*x*/);
+extern int buf_putf64b(buf */*b*/, double /*x*/);
#define dbuf_putf64(db, x) (buf_putf64(DBUF_BUF(db), (x)))
#define dbuf_putf64l(db, x) (buf_putf64l(DBUF_BUF(db), (x)))
#define dbuf_putf64b(db, x) (buf_putf64b(DBUF_BUF(db), (x)))
.\"--------------------------------------------------------------------------
.TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library"
.\" @bench_createtimer
+.\" @BENCH_TIMELOOP_DECLS
+.\" @BENCH_TIMELOOP_TAG
+.
.\" @bench_init
.\" @bench_destroy
.\" @bench_calibrate
+.\" @bench_preflight
+.\" @bench_adapt
+.\" @bench_adjust
+.\" @BENCH_MEASURE_DECLS
+.\" @BENCH_MEASURE_TAG
+.\" @BENCH_MEASURE
.\" @bench_measure
.
+.\" @bench_report
+.
.\"--------------------------------------------------------------------------
.SH NAME
bench \- low-level benchmarking tools
.nf
.B "#include <mLib/bench.h>"
.PP
+.B "#define BTF_TIMEOK ..."
+.B "#define BTF_CYOK ..."
+.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
+.PP
.ta 2n +2n +2n
.B "struct bench_time {"
.B " unsigned f;"
.ta 2n +\w'\fBint (*now)('u
.BI " int (*now)(struct bench_timer *" bt ,
.BI " struct bench_time *" t_out ", unsigned " f );
-.ta 2n +\w'\void (*diff)('u
+.ta 2n +\w'\fBvoid (*diff)('u
.BI " void (*diff)(struct bench_timer *" bt ,
.BI " struct bench_timing *" delta_out ,
.BI " const struct bench_time *" t0 ,
.B "};"
.B "struct bench_timer {"
.B " const struct bench_timerops *ops;"
+.B " unsigned ref;"
.B "};"
.PP
+.B "struct bench_timer *bench_createtimer(void);"
+.B "BENCH_TIMELOOP_DECLS;"
+.ta 2n \w'\fBBENCH_TIMELOOP_TAG('u
+.BI "BENCH_TIMELOOP_TAG(" tag ", struct bench_timer *" tm ,
+.BI " struct bench_timing *" delta_out ", double " n ,
+.BI " " onbreak )
+.BI " " stmt
+.PP
+.B "#define BTF_CLB ..."
+.B "#define BTF_INDIRECT ..."
+.PP
+.ta 2n
.B "struct bench_state {"
.B " unsigned f;"
.B " double target_s;"
.PP
.BI "typedef void bench_fn(unsigned long " n ", void *" ctx );
.PP
-.B "#define BTF_TIMEOK ..."
-.B "#define BTF_CYOK ..."
-.B "#define BTF_CLB ..."
-.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
-.PP
-.B "struct bench_timer *bench_createtimer(void);"
-.PP
.BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm );
.BI "void bench_destroy(struct bench_state *" b );
-.BI "int bench_calibrate(struct bench_state *" b );
+.BI "int bench_calibrate(struct bench_state *" b ", unsigned " f );
+.BI "int bench_preflight(struct bench_state *" b );
+.ta \w'\fBint bench_adapt('u
+.BI "int bench_adapt(struct bench_state *" b ", double *" n_inout ,
+.BI " const struct bench_timing *" t );
+.ta \w'\fBint bench_adjust('u
+.BI "int bench_adjust(struct bench_state *" b ", struct bench_timing *" t_inout ,
+.BI " double " n ", double " base );
+.B "BENCH_MEASURE_DECLS;"
+.ta 2n \w'\fBBENCH_MEASURE_TAG('u
+.BI "BENCH_MEASURE_TAG(" tag ", struct bench_state *" b ,
+.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae )
+.BI " " stmt
+.ta 2n \w'\fBBENCH_MEASURE('u
+.BI "BENCH_MEASURE(struct bench_state *" b ,
+.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae )
+.BI " " stmt
.ta \w'\fBint bench_measure('u
.BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out ,
.BI " double " base ", bench_fn *" fn ", void *" ctx );
+.PP
+.ta 2n
+.B "enum {"
+.B " BTU_OP = 0,"
+.B " BTU_BYTE = 1,"
+.B " ..."
+.BI " BTU_LIMIT = " n
+.B "};"
+.ta \w'\fBvoid bench_report('u
+.BI "void bench_report(const struct gprintf_ops *" gops ", void *" go ,
+.BI " unsigned " unit ", const struct bench_timing *" t );
+.PP
.fi
.
.\"--------------------------------------------------------------------------
provides declarations and defintions
for performing low-level benchmarks.
.PP
-The `main event' is
-.BR bench_measure .
-This function will be described in detail later,
+The `main event' are the
+.B BENCH_MEASURE
+macro and
+.B bench_measure
+function.
+These will be described in detail later,
but, in brief,
-it calls a caller-provided function,
+they execute a caller-provided piece of code
instructing it to run adaptively chosen numbers of iterations,
in order to get a reasonably reliable measurement of its running time,
-and then reports its results by filling in a structure.
+and then report the results by filling in a structure.
.PP
-With understanding this function as our objective,
-we must examine all of the pieces involved in making it work.
+With understanding these as our objective,
+we must examine all of the pieces involved in making them work.
.
.SS Timers in general
A
and/or in CPU cycles.
A timer is represented by a pointer to an object of type
.BR "struct bench_timer" .
-This structure has a single member,
+This structure has two members:
.BR ops ,
pointing to a
.BR "struct bench_timerops" ,
-which is a table of function pointers;
+which is a table of function pointers,
+and
+.BR ref ,
+which is a simple reference count;
typically, a timer has more data following this,
but this fact is not exposed to applications.
.PP
.IB tm ->ops->describe( tm ", " d)
Write a description of the timer to the dynamic string
.IR d .
-.TPc
+.TP
.IB tm ->ops->preflight( tm )
Ensure that the timer is in working order,
and perform any necessary per-thread or per-process setup.
Destroy the timer,
releasing all of the resources that it holds.
.PP
+In a freshly-created timer, the
+.B ref
+member is 1.
+Applications are expected to handle the reference count themselves;
+the
+.B destroy
+function does not check or decrement the count.
+Code for destroying the timer when it's no longer needed
+might look like this.
+.VS
+if (!--tm->ref) tm->ops->destroy(tm);
+.VE
A
.B bench_timing
structure reports the difference between two times,
if the timer returned any valid timing information.
.TP
.B n
-The number of iterations performed by the benchmark function
+The number of units processed the benchmark computation
on its satisfactory run,
-multiplied by
-.IR base .
+multiplied by a given
+.IR base
+\(en see
+.BR BENCH_MEASURE ,
+.BR bench_measure ,
+and
+.BR bench_adjust .
.TP
.B t
The time taken for the satisfactory run of the benchmark function,
and or
.B BTF_CYOK
if the passage-of-time or cycle count respectively are valid.
+.PP
+The
+.B BENCH_TIMELOOP_TAG
+macro uses a timer to measure a number of iterations of a computation.
+It requires the declarations made by
+.B BENCH_TIMELOOP_DECLS
+to be in scope,
+ideally within an enclosing block
+(rather than at top-level,
+where they'll have static storage duration,
+and take longer to access).
+The macro's expansion is syntactically a statement head;
+see
+.BR control (3)
+for details about the underlying machinery.
+In more detail, the macro is invoked as
+.IP
+.nf
+.ta 2n
+.BI "BENCH_TIMELOOP_TAG(" tag ", " tm ", " delta_out ", " n ", " onbreak )
+.BI " " stmt
+.fi
+.PP
+The
+.I tag
+argument is used to distinguish
+the labels used internally by the macro:
+see
+.BR control (3)
+for details about tags.
+The macro calls on the timer
+.I tm
+to determine the initial time and cycle counts,
+performs
+.I n
+iterations of some computation,
+and calls on the timer a second time
+to determine the final time and cycle counts,
+and to store the difference in
+.BI * delta_out \fR.
+The
+.I stmt
+may be any C statement:
+when it is executed,
+the variable
+.BR _bench_n ,
+of type
+.BR "unsigned long" ,
+is in scope.
+The statement should perform
+.B _bench_n
+iterations of the computation to be measured
+\(en and do as little else as possible.
+The argument
+.I n
+to the macro
+may be larger than
+.BR ULONG_MAX :
+the macro will execute
+.I stmt
+multiple times if necessary.
+The statement is allowed to clobber
+.BR _bench_n .
+Note that
+.B BENCH_TIMELOOP_TAG
+does
+.I not
+call the timer's
+.B preflight
+function.
+If the
+.I stmt
+executes a free
+.B break
+statement
+then the statement
+.I onbreak
+is executed;
+a free
+.B continue
+statement within
+.I stmt
+currently does not have a useful behaviour.
+Free
+.B break
+and
+.B continue
+statements within
+.I onbreak
+behave normally.
+(See
+.BR control (3)
+for a definition of
+`free'
+.B break
+and
+.B continue
+statements.)
.
.SS The built-in timer
The function
except check that the
.B linux-x86-perf-rdpmc-hw-cycles
cycle subtimer has been selected.
-This is almost certainly the best choice if it's available.
+This is almost certainly the best choice if it's available;
+It is, however, not compatible with (at least some versions of)
+.BR valgrind (1);
+it will detect that it is running under
+.B valgrind
+and fail to initialize.
.TP
.B x86-rdtscp
Counts CPU cycles using the x86
is called with a non-null timer pointer,
then it will not fail;
the benchmark state will be initialized,
-and the function returns zero.
+and the function returns zero;
+the timer's reference count is
+.I not
+incremented.
If the timer pointer is null,
then
.B bench_init
then the benchmark state will be initialized,
and the function returns zero.
In both cases,
-the timer becomes owned by the benchmark state:
+the timer reference becomes owned by the benchmark state:
calling
.B bench_destroy
-on the benchmark state will destroy the timer.
+on the benchmark state will decrement the timer's reference count,
+and destroy it unless it has additional outstanding references.
If
.B bench_init
is called with a null timer pointer,
and its attempt to create a timer for itself fails,
then
.B bench_init
-returns \-1;
+returns \-1:
the benchmark state is not initialized
-and can safely be discarded;
-calling
-safe to call
-.B bench_destroy
-on the unsuccessfully benchmark state is safe and has no effect.
+and can safely be discarded.
.PP
Calling
.B bench_destroy
on a benchmark state
releases any resources it holds,
most notably its timer, if any.
+Calling
+.B bench_destroy
+on an unsuccessfully initialized benchmark state
+is safe but has no effect.
.PP
Although
.B struct bench_state
if passage-of-time calibration fails,
then cycle count calibration is impossible.
.PP
+The benchmarking state must be calibrated differently
+for different kinds of timing loop;
+this is controlled by the flags passed as the
+.I f
+argument to
+.BR bench_calibrate .
+The main difference lies in whether the code to be measured
+is called
+.IR indirectly ,
+i.e., via a function pointer.
+Set
+.B BTF_INDIRECT
+if the code is to be called indirectly;
+leave this flag clear if the code is called directly.
+The
+.B bench_measure
+function always makes indirect calls;
+the
+.B BENCH_MEASURE
+macro does not itself make indirect calls.
+Usually, a program needs only one or the other;
+if both are necessary for some reason,
+the best approach is just to set up two benchmarking states
+sharing the same timer,
+and calibrate them separately.
+.PP
When it completes,
.B bench_calibrate
-sets flag in the benchmark state's
+sets flags in the benchmark state's
.B f
member:
if passage-of-time calibration succeeded,
it returns immediately,
either returning 0 or \-1
according to whether passage-of-time had previously been calibrated.
-.
-.SS Timing functions
-A
+.PP
+The
+.B BENCH_MEASURE
+macro measures the performance of a computation.
+It requires the declarations made by
+.B BENCH_MEASURE_DECLS
+to be in scope,
+ideally within an enclosing block
+(rather than at top-level,
+where they'll have static storage duration,
+and take longer to access).
+The macro's expansion is syntactically a statement head;
+see
+.BR control (3)
+for details about the underlying machinery.
+In more detail, the macro is invoked as
+.IP
+.nf
+.ta 2n
+.BI "BENCH_MEASURE(" b ", " rc ", " t_out ", " base )
+.BI " " stmt
+.fi
+.PP
+The
+.I stmt
+can be any C statement;
+it should perform
+.B _bench_n
+iterations of the computation to be measured.
+(The variable
+.B _bench_n
+is declarared as part of
+.B BENCH_MEASURE_DECLS
+and has type
+.BR "unsigned long" .
+Before commencing measurement proper,
+the macro calls
+.BR bench_preflight ,
+described below,
+to check that everything is set up properly
+for measurements on the current thread;
+if this fails, then the macro sets
+.I rc
+to \-1.
+Otherwise, the macro executes
+.I stmt
+one or more times,
+with the objective of finding an iteration count
+.I n
+such that
+.I n
+iterations of the computation take more than
+.IB b ->target_s "" \fR/\(sr2
+seconds.
+If measurement fails,
+then
+.I rc
+is set to \-1;
+otherwise,
+.I rc
+is set to zero, and
+.BI * t_out
+is filled in with the measurement;
+.IB t_out ->n
+is set to
+.IR n "\ \(mu\ " base .
+.PP
+The
+.B BENCH_MEASURE_TAG
+macro works just like
+.B BENCH_MEASURE
+except that it takes an additional
+.I tag
+argument used to distinguish the internal labels
+used by the macro's implementation;
+this makes it possible to use
+.B BENCH_MEASURE_TAG
+as a component in more complex macros.
+See
+.BR control (3)
+for details about control-structure macros
+and the meaning and format of tags.
+.PP
+The function
+.B bench_measure
+is similar,
+except that it calls a
.I benchmark function
-has the signature
+to perform the computation.
+A benchmark function has the signature
.IP
.BI "void " fn "(unsigned long " n ", void *" ctx );
.PP
argument is a pointer passed into
.B bench_measure
for the benchmark function's own purposes.
-.PP
-The function
+The
.B bench_measure
-receives five arguments.
-.TP
-.I b
-points to the benchmark state to be used.
-.TP
-.I t_out
-is the address of a
-.BR struct bench_timing
-in which the measurement should be left.
-This structure is described below.
-.TP
-.I base
-is a count of the number of operations performed
-by each iteration of the benchmark function.
-.TP
-.I fn
-is a benchmark function, described above.
-.TP
-.I ctx
-is a pointer to be passed to the benchmark function.
+function returns zero on success,
+or \-1 on failure.
+Note that
.B bench_measure
-does not interpret this pointer in any way.
+invokes the benchmark indirectly,
+so the benchmark state should have been calibrated with
+.BR BTF_INDIRECT .
+.
+.SS Measurement utilities
+The following functions are primarily exported for the benefit of the
+.B BENCH_MEASURE
+macro,
+but are documented here in case they are useful.
.PP
The
-.B bench_measure
-function calls its benchark function repeatedly
-with different iteration counts
-.IR n ,
-with the objective that the call take approximately
-.B target_s
-seconds, as established in the benchmark state.
-(Currently, if
-.B target_s
-holds the value
-.IR t ,
-then
-.B bench_measure
-is satisfied when a call takes at least
-.IR t /\(sr2\*,s.)
-Once the function finds a satisfactory number of iterations,
-it stores the results in
-.BI * t_out \fR.
-If measurement succeeds, then
-.B bench_measure
-returns zero.
-If it fails \(en
-most likely because the timer failed \(en
-then it returns \-1.
+.B bench_preflight
+function prepares a benchmarking state for use.
+It checks that the timer is calibrated
+and suitable for measuring passage-of-time;
+it also calls the timer's
+.B preflight
+function to prepare it for measurements on the current thread.
+If these checks succeed, then
+.B bench_preflight
+returns zero;
+otherwise it returns \-1
+and the caller should not proceed with measurements.
+.PP
+The
+.B bench_adapt
+function is used to determine iteration counts.
+It is used in a loop such as the following.
+.IP
+.nf
+.ta 2n +2n
+.B "BENCH_TIMELOOP_DECLS;"
+.B "struct bench_timer *tm;"
+.B "struct bench_timing t;"
+.B "double n = 1.0, target_s = 1.0;"
+.IP
+.B "do {"
+.B " BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })"
+.BI " " "(do " _bench_n " iterations of some computation)" ;
+.B "} while (!bench_adapt(&n, target_s, &t));"
+.fi
+.PP
+On entry,
+.BI *n_inout
+should be the number of iterations performed by the previous step,
+and
+.BI * t
+the resulting time;
+the
+.B BTF_TIMEOK
+flag must be set in
+.IB t ->f \fR.
+If the timing is sufficient \(en if
+.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+\(en then
+.B bench_adapt
+returns a nonzero value to indicate that measurement is complete.
+Otherwise, it sets
+.BI * n_inout
+to a new, larger iteration count
+and returns zero to indicate that a further pass is necessary.
+.PP
+The
+.B bench_adjust
+function adjusts a raw timing,
+as captured by
+.BR BENCH_TIMELOOP_TAG ,
+according to the calibration data captured in
+.IR b .
+On exit, the timing data is updated,
+and
+.IB t ->n
+is set to the product
+.IR n "\ \(mu\ " base .
+.
+.SS Reporting results
+The
+.B bench_report
+function formats a measurement result
+into a human-readable string.
+The function writes its output using the
+generalized output formatting operations
+.I gops
+and output pointer
+.IR go ;
+see
+.BR gprintf (3)
+for details on generalized output formatting.
+The
+.I unit
+argument describes the unit of activity being measured:
+.TP
+.B BTU_OP
+counts operations of some unspecified nature, while
+.TP
+.B BTU_BYTE
+counts a number of bytes processed.
+.PP
+These are presented differently
+\(em in particular,
+quantities bytes are expressed using binary scaling rather than decimal.
+The timing to report is given by the
+.I t
+argument;
+.IB t ->n
+gives the number of units processed.
+.
+.\"--------------------------------------------------------------------------
+.SH EXAMPLE
.
+The following macros offer a fairly simple example of
+how the benchmarking functions and macros can be used.
+.VS
+.ta 2n +2n +2n 2n+\w'\fBBENCH_MEASURE_TAG('u \n(.lu-\n(.iu-4n
+#define BENCHMARK_DECLS \e
+ struct bench_timing _bmark_t; \e
+ int _bmark_rc; \e
+ BENCH_MEASURE_DECLS
+.VP
+#define BENCHMARK_TAG(tag, b, unit, base) \e
+ MC_BEFORE(tag##__benchmark_before, { fflush(stdout); }) \e
+ MC_AFTER(tag##__benchmark_after, { \e
+ if (_bmark_rc) \e
+ printf(": FAILED\en"); \e
+ else { \e
+ fputs(": ", stdout); \e
+ bench_report(&file_printops, stdout, (unit), &_bmark_tm);\ \e
+ putchar('\n'); \e
+ } \e
+ }) \e
+ BENCH_MEASURE_TAG(tag##__bmarkmark_measure, \e
+ (b), _bmark_rc, &_bmark_t, (base))
+#define BENCHMARK(b, unit, base) \e
+ BENCHMARK_TAG(bench, b, unit, base)
+.VE
+
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
.
+.BR control (3),
+.BR macros (3),
.BR tvec-bench (3),
.BR mLib (3).
.
if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end;
/* All is done. */
- t->_t.ops = &timer_ops; ret = &t->_t; t = 0;
+ t->_t.ops = &timer_ops; t->_t.ref = 1; ret = &t->_t; t = 0;
end:
if (t) timer_destroy(&t->_t);
return (ret);
*/
void bench_destroy(struct bench_state *b)
- { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
+ { if (b->tm && !--b->tm->ref) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
/* --- @spin@ --- *
*
*/
if (f&BTF_INDIRECT) {
for (i = 0; i < 50; i++)
- BENCH_TIMELOOP_TAG(setup, b, &delta, 10000)
+ BENCH_TIMELOOP_TAG(setup, b->tm, &delta, 10000, ;)
LAUNDER(&spin)(_bench_n, 0);
}
/* Measure @n@ iterations of the idle loop. */
if (f&BTF_INDIRECT)
- BENCH_TIMELOOP_TAG(calibrate, b, &delta, n)
+ BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;)
LAUNDER(&spin)(_bench_n, 0);
else
- BENCH_TIMELOOP_TAG(calibrate, b, &delta, n)
+ BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;)
while (_bench_n--) RELAX;
tf &= delta.f; if (!(tf&BTF_TIMEOK)) { rc = -1; goto end; }
/* --- @bench_adapt@ --- *
*
- * Arguments: @struct bench_state *b@ = benchmark state
- * @double *n_inout@ = number of iterations, updated
+ * Arguments: @double *n_inout@ = number of iterations, updated
+ * @double target_s@ = target time in seconds
* @const struct bench_timing *t@ = timing from the previous run
*
* Returns: Nonzero if the measurement is sufficient; zero to run again.
* On entry, @*n_inout@ should be the number of iterations
* performed by the previous pass, and @*t@ the resulting time;
* the @BTF_TIMEOK@ flag must be set @t->f@. If the timing is
- * sufficient -- @t->t@ is sufficiently close to @b->target_s@
+ * sufficient -- @t->t@ is sufficiently close to @target_s@
* -- then the function returns nonzero to indicate that
* measurement is complete. Otherwise, it sets @*n_inout@ to a
* new, larger iteration count and returns zero to indicate that
* a further pass is necessary.
*/
-int bench_adapt(struct bench_state *b,
- double *n_inout, const struct bench_timing *t)
+int bench_adapt(double *n_inout, double target_s,
+ const struct bench_timing *t)
{
double n = *n_inout, nn;
* hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying
* again with %$n' = n + 1$% iterations will very likely work.
*/
- if (t->t >= 0.707*b->target_s) return (1);
- nn = n*b->target_s/t->t; modf(nn, &nn);
+ if (t->t >= 0.707*target_s) return (1);
+ nn = n*target_s/t->t; modf(nn, &nn);
*n_inout = nn > n ? nn : n + 1;
return (0);
}
BENCH_MEASURE_DECLS;
int rc;
- BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
- fn(_bench_n, ctx);
+ BENCH_MEASURE(b, rc, t_out, base) fn(_bench_n, ctx);
return (rc);
}
# include "dstr.h"
#endif
+#ifndef MLIB_MACROS_H
+# include "macros.h"
+#endif
+
#ifndef MLIB_GPRINTF_H
# include "gprintf.h"
#endif
-/*----- Data structures ---------------------------------------------------*/
+/*----- Timers ------------------------------------------------------------*/
struct bench_time {
unsigned f; /* flags */
double n, t, cy; /* count, time, and cycles */
};
-struct bench_timer { const struct bench_timerops *ops; };
+struct bench_timer {
+ const struct bench_timerops *ops;
+ unsigned ref;
+};
struct bench_timerops {
void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
/* Release the timer and any resources it holds. */
};
-struct bench_state {
- struct bench_timer *tm; /* a timer */
- double target_s; /* target time to run benchmarks */
- unsigned f; /* calibration flags (@BTF_...@) */
-#define BTF_CLB 0x0100 /* tried calibrating */
- struct { double m, c; } clk, cy; /* calculated overheads */
-};
-
-typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
- /* Run the benchmark @n@ times, given a context pointer @ctx@. */
-
-enum {
- BTU_OP, /* counting operations of some kind */
- BTU_BYTE, /* counting bytes (@rbuf >= 0@) */
- BTU_LIMIT /* (number of units) */
-};
+/* --- @bench_createtimer@ --- *
+ *
+ * Arguments: @const char *config@ = user-supplied configuration string
+ *
+ * Returns: A freshly constructed standard timer object, or null on
+ * failure.
+ *
+ * Use: Allocate a timer. Dispose of it by calling
+ * @tm->ops->destroy(tm)@ when you're done.
+ *
+ * Applications should not set configuration strings except as
+ * established by user action, e.g., from a command-line option,
+ * environment variable, or configuration file.
+ */
-/*----- Macros ------------------------------------------------------------*/
+extern struct bench_timer *bench_createtimer(const char *config);
/* --- @BENCH_TIMELOOP_DECLS@ --- *
*
/* --- @BENCH_TIMELOOP_TAG@ --- *
*
* Arguments: @tag@ = control structure tag
- * @struct bench_state *b@ = benchmark state
+ * @struct bench_state *tm@ = benchmark timer
* @struct bench_timing *delta_out@ = where to put the result
* @double n@ = number of iterations
*
* Returns: ---
*
- * Use: @BENCH_TIMELOOP_TAG(tag, b, delta_out, n) stmt@ runs @stmt@
+ * Use: @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
* @n@ times, capturing the time and/or CPU cycles taken in
* @*delta_out@. The iteration count must be no more than the
* %%\emph{square}%% of @ULONG_MAX@. If @stmt@ executes a free
- * @break@ statement, then the containing loop -- which ust
+ * @break@ statement, then the containing loop -- which must
* exist -- is exited.
*
* This macro is not intended to be used directly by users: it
* is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
*/
-#define BENCH_TIMELOOP_TAG(tag, b, delta_out, n) \
+#define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak) \
MC_BEFORE(tag##__benchtl_setup, { \
double _R = ULONG_MAX; double _n = (n); \
\
- _bench_tm = (b)->tm; \
+ _bench_tm = (tm); \
if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; } \
else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; } \
}) \
{ ; }, \
{ MC_GOTARGET(tag##__benchtl_break); })
-/* --- @BENCH_MEASURE_DECLS@ --- *
- *
- * Arguments: ---
- *
- * Use: Expands to the declarations needed by @BENCH_MEASURE@.
- * These should be at block scope, not at toplevel.
- */
-
-#define BENCH_MEASURE_DECLS \
- struct bench_state *_bench_b; \
- struct bench_timing *_bench_t; \
- double _bench_nn; \
- BENCH_TIMELOOP_DECLS
-
-/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
- *
- * Arguments: @tag@ = control structure tag
- * @struct bench_state *b@ = benchmark state
- * @int &rc@ = where to put the result code (zero on success,
- * %$-1$% on failure)
- * @struct bench_timing *t_out@ = where to put the result
- * @double base@ = number of operations per external iteration
- *
- * Returns: ---
- *
- * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
- * execution of @stmt@.
- *
- * The statement should perform @_bench_n@ iterations of some
- * operation. It will be invoked multiple times, with varying
- * iteration counts, so as to run for approximately
- * @b->target_s@ seconds.
- *
- * On success, the resulting timing is left in @*t_out@, with
- * @t_out->n@ holding the product of the final iteration count
- * and @base@, and @rc@ is set to zero. If the timer fails, or
- * if @stmt@ invokes a free @break@ statement, then @rc@ is set
- * to %$-1$%.
- *
- * The macro @BENCH_MEASURE_TAG@ is the same, except that it
- * allows an explicit control-structure tag to be set so that it
- * can be used within larger control structure macros.
- */
-
-#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \
- MC_BEFORE(tag##__benchmsr_setup, { \
- _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \
- if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \
- }) \
- MC_TARGET(tag##__benchmsr_done, \
- { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
- MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \
- for (;;) \
- MC_WRAP(tag##__benchmsr_loop, \
- { ; }, \
- { _bench_t->f &= _bench_b->f; \
- if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
- if (bench_adapt(_bench_b, &_bench_nn, _bench_t)) \
- MC_GOTARGET(tag##__benchmsr_done); \
- }, \
- { MC_GOTARGET(tag##__benchmsr_fail); }) \
- BENCH_TIMELOOP_TAG(tag##__benchmsr_time, _bench_b, _bench_t, _bench_nn)
-
-#define BENCH_MEASURE(b, rc, t_out, base) \
- BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
-
-/* --- @BENCHMARK_DECLS@ --- *
- *
- * Arguments: ---
- *
- * Use: Expands to the declarations needed by @BENCHMARK_TAG@.
- * These should be at block scope, not at toplevel.
- */
-
-#define BENCHMARK_DECLS \
- struct bench_timing _bench_tm; \
- int _bench_rc; \
- BENCH_MEASURE_DECLS
-
-/* --- @BENCHMARK_TAG@ --- *
- *
- * Arguments: @tag@ = control structure tag
- * @struct bench_state *b@ = benchmark state
- * @struct gprintf_ops *gops, void *go@ = output formatter
- * @unsigned unit@ = unit being measured (@BTU_...@ code)
- * @double base@ = number of units per external iteration
- *
- * Returns: ---
- *
- * Use: @BENCHMARK_TAG(tag, b, gops, go, unit, base) stmt@ measures
- * the execution of @stmt@ and writes a report to an output
- * formatter. The @stmt@ should run @_bench_n@ iterations of
- * the operation to be measured.
- *
- * No tagless version of this macro is provided, because it is
- * expected to be useful primarily in the construction of
- * higher-level macros.
- */
-
-#define BENCHMARK_TAG(tag, b, gops, go, unit, base) \
- MC_AFTER(tag##__benchmark_after, { \
- if (_bench_rc) gprintf((gops), (go), "FAILED"); \
- else bench_report((gops), (go), _bench_b, (unit), &_bench_tm); \
- }) \
- BENCH_MEASURE_TAG(tag##__benchmark_measure, \
- (b), _bench_rc, &_bench_tm, (base))
-
-/*----- Functions provided ------------------------------------------------*/
+/*----- Measuring ---------------------------------------------------------*/
-/* --- @bench_createtimer@ --- *
- *
- * Arguments: @const char *config@ = user-supplied configuration string
- *
- * Returns: A freshly constructed standard timer object, or null on
- * failure.
- *
- * Use: Allocate a timer. Dispose of it by calling
- * @tm->ops->destroy(tm)@ when you're done.
- *
- * Applications should not set configuration strings except as
- * established by user action, e.g., from a command-line option,
- * environment variable, or configuration file.
- */
+struct bench_state {
+ struct bench_timer *tm; /* a timer */
+ double target_s; /* target time to run benchmarks */
+ unsigned f; /* calibration flags (@BTF_...@) */
+#define BTF_CLB 0x0100 /* tried calibrating */
+ struct { double m, c; } clk, cy; /* calculated overheads */
+};
-extern struct bench_timer *bench_createtimer(const char *config);
+typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
+ /* Run the benchmark @n@ times, given a context pointer @ctx@. */
/* --- @bench_init@ --- *
*
/* --- @bench_adapt@ --- *
*
- * Arguments: @struct bench_state *b@ = benchmark state
- * @double *n_inout@ = number of iterations, updated
+ * Arguments: @double *n_inout@ = number of iterations, updated
+ * @double target_s@ = target time in seconds
* @const struct bench_timing *t@ = timing from the previous run
*
* Returns: Nonzero if the measurement is sufficient; zero to run again.
* benchmark function to perform next. It is used in a loop
* such as the following.
*
- * @double n = 1.0;@
+ * @BENCH_TIMELOOP_DECLS;@
+ * @struct bench_timer *tm;@
* @struct bench_timing t;@
+ * @double n = 1.0, target_s = 1.0;@
*
* @do {@
- * (run @n@ iterations; set @t@ to the timing)
- * @} while (!bench_adapt(b, &n, &t));@
+ * @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
+ * (do @_bench_n@ iterations of some computation)@;@
+ * @} while (!bench_adapt(&n, target_s, &t));@
*
* On entry, @*n_inout@ should be the number of iterations
* performed by the previous pass, and @*t@ the resulting time;
- * the @BTF_TIMEOK@ flag must be set @t->f@. If the timing is
- * sufficient -- @t->t@ is sufficiently close to @b->target_s@
+ * the @BTF_TIMEOK@ flag must be set in @t->f@. If the timing
+ * is sufficient -- @t->t@ is sufficiently close to @target_s@
* -- then the function returns nonzero to indicate that
* measurement is complete. Otherwise, it sets @*n_inout@ to a
* new, larger iteration count and returns zero to indicate that
* @bench_measure@ function.
*/
-extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/,
+extern int bench_adapt(double */*n_inout*/, double /*target_s*/,
const struct bench_timing */*t*/);
/* --- @bench_adjust@ --- *
*
* Returns: ---
*
- * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP@,
+ * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
* according to the calibration data captured in @b@.
* On exit, the timing data is updated, and @t->n@ is set to the
* product @n*base@.
struct bench_timing */*t_inout*/,
double /*n*/, double /*base*/);
+/* --- @BENCH_MEASURE_DECLS@ --- *
+ *
+ * Arguments: ---
+ *
+ * Use: Expands to the declarations needed by @BENCH_MEASURE@.
+ * These should be at block scope, not at toplevel.
+ */
+
+#define BENCH_MEASURE_DECLS \
+ struct bench_state *_bench_b; \
+ struct bench_timing *_bench_t; \
+ double _bench_nn; \
+ BENCH_TIMELOOP_DECLS
+
+/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
+ *
+ * Arguments: @tag@ = control structure tag
+ * @struct bench_state *b@ = benchmark state
+ * @int &rc@ = where to put the result code (zero on success,
+ * %$-1$% on failure)
+ * @struct bench_timing *t_out@ = where to put the result
+ * @double base@ = number of operations per external iteration
+ *
+ * Returns: ---
+ *
+ * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
+ * execution of @stmt@.
+ *
+ * The statement should perform @_bench_n@ iterations of some
+ * operation. It will be invoked multiple times, with varying
+ * iteration counts, so as to run for approximately
+ * @b->target_s@ seconds.
+ *
+ * On success, the resulting timing is left in @*t_out@, with
+ * @t_out->n@ holding the product of the final iteration count
+ * and @base@, and @rc@ is set to zero. If the timer fails, or
+ * if @stmt@ invokes a free @break@ statement, then @rc@ is set
+ * to %$-1$%.
+ *
+ * The macro @BENCH_MEASURE_TAG@ is the same, except that it
+ * allows an explicit control-structure tag to be set so that it
+ * can be used within larger control structure macros.
+ */
+
+#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \
+ MC_BEFORE(tag##__benchmsr_setup, { \
+ _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \
+ if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \
+ }) \
+ MC_TARGET(tag##__benchmsr_done, \
+ { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
+ MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \
+ for (;;) \
+ MC_WRAP(tag##__benchmsr_loop, \
+ { ; }, \
+ { _bench_t->f &= _bench_b->f; \
+ if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
+ if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t)) \
+ MC_GOTARGET(tag##__benchmsr_done); \
+ }, \
+ { MC_GOTARGET(tag##__benchmsr_fail); }) \
+ BENCH_TIMELOOP_TAG(tag##__benchmsr_time, \
+ _bench_b->tm, _bench_t, _bench_nn, { break; })
+
+#define BENCH_MEASURE(b, rc, t_out, base) \
+ BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
+
/* --- @bench_measure@ --- *
*
* Arguments: @struct bench_state *b@ = benchmark state
/*----- Reporting ---------------------------------------------------------*/
+enum {
+ BTU_OP, /* counting operations of some kind */
+ BTU_BYTE, /* counting bytes (@rbuf >= 0@) */
+ BTU_LIMIT /* (number of units) */
+};
+
/* --- @bench_report@ --- *
*
* Arguments: @const struct gprintf_ops *gops, void *gp@ = output formatter
adhoc_SOURCES =
adhoc_SOURCES += adhoc.c
+adhoc_SOURCES += fib.c
+adhoc_SOURCES += example.h
+
+noinst_PROGRAMS += bench
+bench_SOURCES =
+
+bench_SOURCES += bench.c
+bench_SOURCES += fib.c
+bench_SOURCES += example.h
###----- That's all, folks --------------------------------------------------
/*----- Header files ------------------------------------------------------*/
-#include <limits.h>
#include <stdio.h>
#include "macros.h"
#include "tvec-bench.h"
#include "tvec-types.h"
-/*----- Main code ---------------------------------------------------------*/
-
-/* Stupid but traditional recursive Fibonacci. */
-static unsigned long recfib(unsigned n)
- { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); }
-
-/* Slightly less stupid but still traditional iterative Fibonacci. */
-static unsigned long iterfib(unsigned n)
-{
- unsigned long u, v, t;
-
- for (u = 0, v = 1; n--; t = v, v = u, u += t);
- return (u);
-}
+#include "example.h"
-/* Sadly nontraditional intelligent Fibonacci. */
-static unsigned long expfib(unsigned n)
-{
- unsigned long a, b, u, v, t;
-
- /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%. I claim that
- * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%. Proof by induction:
- * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$%
- * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$%
- * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$%
- * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed.
- *
- * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$%
- * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$%
- * %$b d = (a c + a d + b c) \phi + (a c + b d)$%. In particular,
- * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%.
- */
- a = 0, b = 1; u = 1, v = 0;
- if (n)
- for (;;) {
- if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; }
- n /= 2; if (!n) break;
- t = u*u; u = t + 2*u*v; v = t + v*v;
- }
- return (a);
-}
+/*----- Main code ---------------------------------------------------------*/
int main(int argc, char *argv[])
{
struct tvec_state tvstate;
int argpos;
unsigned long i, a, b, t;
+ dstr d = DSTR_INIT;
tvec_parseargs(argc, argv, &tvstate, &argpos, &tvec_adhocconfig);
if (argpos < argc) die(2, "no input files expected");
tvec_adhoc(&tvstate);
-#if (ULONG_MAX/65536 >> 16) >= 0xffffffff
-# define FIBLIMIT 94 /* F_94 = 19740274219868223167 > 2^64 */
-#else
-# define FIBLIMIT 48 /* F_48 = 4807526976 > 2^32 */
-#endif
-
TVEC_TESTGROUP(&tvstate, "fib-test") {
for (i = 0, a = 0, b = 1; i < FIBLIMIT; i++, t = b, b = a, a += t)
TVEC_TEST(&tvstate) {
if (tvec_benchprep(&tvstate, &tvec_benchstate, 0)) break;
- TVEC_BENCHMARK("recfib, n = 40", &tvstate, tvec_benchstate, BTU_OP, 1)
- while (_bench_n--) ADMIRE(recfib(40));
- TVEC_BENCHMARK("iterfib, n = " STR(FIBLIMIT),
- &tvstate, tvec_benchstate, BTU_OP, 1)
+ dstr_reset(&d); dstr_putf(&d, "recfib, n = %u", RECFIBLIMIT);
+ TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
+ while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT));
+
+ dstr_reset(&d); dstr_putf(&d, "iterfib, n = %u", FIBLIMIT);
+ TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
while (_bench_n--) ADMIRE(iterfib(FIBLIMIT));
- TVEC_BENCHMARK("expfib, n = " STR(FIBLIMIT),
- &tvstate, tvec_benchstate, BTU_OP, 1)
+
+ dstr_reset(&d); dstr_putf(&d, "expfib, n = %u", FIBLIMIT);
+ TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1)
while (_bench_n--) ADMIRE(expfib(FIBLIMIT));
}
+ dstr_destroy(&d);
return (tvec_end(&tvstate));
}
--- /dev/null
+/* -*-c-*-
+ *
+ * Demonstration of standalone benchmarking
+ *
+ * (c) 2024 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib. If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "macros.h"
+#include "report.h"
+#include "tvec.h"
+#include "tvec-adhoc.h"
+#include "tvec-bench.h"
+#include "tvec-types.h"
+
+#include "example.h"
+
+/*----- Macro from the manpage --------------------------------------------*/
+
+#define BENCHMARK_DECLS \
+ struct bench_timing _bmark_t; \
+ int _bmark_rc; \
+ BENCH_MEASURE_DECLS
+
+#define BENCHMARK_TAG(tag, b, unit, base) \
+ MC_BEFORE(tag##__benchmark_before, { fflush(stdout); }) \
+ MC_AFTER(tag##__benchmark_after, { \
+ if (_bmark_rc) \
+ printf(": FAILED\n"); \
+ else { \
+ fputs(": ", stdout); \
+ bench_report(&file_printops, stdout, (unit), &_bmark_t); \
+ putchar('\n'); \
+ } \
+ }) \
+ BENCH_MEASURE_TAG(tag##__benchmark_measure, \
+ (b), _bmark_rc, &_bmark_t, (base))
+#define BENCHMARK(b, unit, base) BENCHMARK_TAG(bench, b, unit, base)
+
+/*----- Main code ---------------------------------------------------------*/
+
+int main(void)
+{
+ struct bench_state b;
+ BENCHMARK_DECLS;
+
+ if (bench_init(&b, 0))
+ { fprintf(stderr, "timer setup failed\n"); exit(2); }
+ if (bench_calibrate(&b, 0))
+ { fprintf(stderr, "timer calibration failed\n"); exit(2); }
+
+ printf("recfib, n = %u", RECFIBLIMIT);
+ BENCHMARK(&b, BTU_OP, 1)
+ while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT));
+
+ printf("iterfib, n = %u", FIBLIMIT);
+ BENCHMARK(&b, BTU_OP, 1)
+ while (_bench_n--) ADMIRE(iterfib(FIBLIMIT));
+
+ printf("expfib, n = %u", FIBLIMIT);
+ BENCHMARK(&b, BTU_OP, 1)
+ while (_bench_n--) ADMIRE(expfib(FIBLIMIT));
+
+ return (0);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
/*----- Header files ------------------------------------------------------*/
+#include <limits.h>
#include <stddef.h>
/*----- Functions provided ------------------------------------------------*/
* Return zero on success, or -1 on error.
*/
+extern unsigned long recfib(unsigned /*n*/);
+ /* Stupid but traditional recursive Fibonacci. */
+
+extern unsigned long iterfib(unsigned /*n*/);
+ /* Slightly less stupid but still traditional iterative Fibonacci. */
+
+extern unsigned long expfib(unsigned /*n*/);
+ /* Sadly nontraditional intelligent Fibonacci. */
+
+#define RECFIBLIMIT 40 /* too slow beyond this */
+#if (ULONG_MAX/65536 >> 16) >= 0xffffffff
+# define FIBLIMIT 94 /* F_94 = 19740274219868223167 > 2^64 */
+#else
+# define FIBLIMIT 48 /* F_48 = 4807526976 > 2^32 */
+#endif
+
/*----- That's all, folks -------------------------------------------------*/
#ifdef __cplusplus
--- /dev/null
+/* -*-c-*-
+ *
+ * Compute elements of the Fibonacci sequence
+ *
+ * (c) 2024 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib. If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "example.h"
+
+/*----- Main code ---------------------------------------------------------*/
+
+unsigned long recfib(unsigned n)
+ { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); }
+
+unsigned long iterfib(unsigned n)
+{
+ unsigned long u, v, t;
+
+ for (u = 0, v = 1; n--; t = v, v = u, u += t);
+ return (u);
+}
+
+unsigned long expfib(unsigned n)
+{
+ unsigned long a, b, u, v, t;
+
+ /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%. I claim that
+ * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%. Proof by induction:
+ * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$%
+ * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$%
+ * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$%
+ * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed.
+ *
+ * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$%
+ * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$%
+ * %$b d = (a c + a d + b c) \phi + (a c + b d)$%. In particular,
+ * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%.
+ */
+ a = 0, b = 1; u = 1, v = 0;
+ if (n)
+ for (;;) {
+ if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; }
+ n /= 2; if (!n) break;
+ t = u*u; u = t + 2*u*v; v = t + v*v;
+ }
+ return (a);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
.\"--------------------------------------------------------------------------
.TH align 3mLib "4 January 2009" "Straylight/Edgeware" "mLib utilities library"
.\" @ALIGN
+.\" @ALIGNOF
.
.\"--------------------------------------------------------------------------
.SH NAME
.nf
.B "#include <mLib/align.h>"
.PP
-.BI "size_t ALIGN(size_t " sz ");"
+.B "union align { ...\& };"
+.PP
+.BI "void ALIGN(size_t &" sz ");"
+.BI "size_t ALIGNOF(" type );
.fi
.
.\"--------------------------------------------------------------------------
.SH DESCRIPTION
+.
+The
+.B "union align"
+type is (a reasonable guess at) a type with the
+implementation's most restrictive alignment requirements;
+i.e., if a pointer is suitably aligned for
+.B "union align"
+value,
+then it should be suitably aligned for any other value too.
+.PP
+The
+.B ALIGNOF
+macro returns the alignment required for the given
+.IR type ;
+i.e., if an address is suitably aligned for a
+.I type
+value if and only if it is a multiple of
+.BR ALIGNOF( type ) \fR.
+.PP
The
.B ALIGN
-macro returns the value of its argument
+macro rounds its argument
+.I sz
+up to the next multiple of
+.BR "ALIGNOF(union align)" :
+so, if
+.I sz
+is an offset from the start of a suitably aligned chunk of memory,
+e.g., returned by
+.BR malloc (3),
+then
+.BI ALIGN( sz )
+adjusts
.I sz
-rounded up to the next multiple of the size of
-.BR "union align" ,
-which is defined as a union containing a selection of built-in types.
-The intent is to write fairly portable memory allocators, which must
-return correctly-aligned memory.
-.IR array .
+in place, including additional padding
+so that the new offset is also suitably aligned.
+The intent is to assist with writing fairly portable memory allocators,
+which must return correctly-aligned memory.
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
/*----- Some useful constants ---------------------------------------------*/
-#define B31 0x80000000 /* just bit 31 */
-#define B30 0x40000000 /* just bit 30 */
+#define B31 0x80000000u /* just bit 31 */
+#define B30 0x40000000u /* just bit 30 */
#define SH32 4294967296.0 /* 2^32 as floating-point */
#define MLIB__STR(x) #x
#define STR(x) MLIB__STR(x)
-/* --- @GLUE@ --- *
+/* --- @GLUE@, @GLUE3@ --- *
*
* Arguments: @x, y@ = two sequences of tokens
+ * @z@ = a third sequence of tokens
*
* Returns: A single token formed by gluing together the macro-expansions
- * of @x@ and @y@.
+ * of @x@ and @y@, and @z@ for @GLUE3@.
*/
#define MLIB__GLUE(x, y) x##y
#define GLUE(x, y) MLIB__GLUE(x, y)
+#define GLUE3(x, y, z) GLUE(x, MLIB__GLUE(y, z))
/* --- @STATIC_ASSERT@ --- *
*
#define UNQUALIFY(type, p) \
CONVERT_CAREFULLY(type *, const volatile type *, p)
+/* --- @EMPTY@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: The empty token sequence.
+ */
+
+#define EMPTY
+
/* --- @COMMA@ --- *
*
* Arguments: ---
#include <math.h>
-#ifndef MLIB_BITS_H
-# include "bits.h"
-#endif
-
/*----- Macros provided ---------------------------------------------------*/
/* --- @NANP@ --- *
# define NEGP(x) ((x) < 0)
#endif
-/*----- Floating-point encoding and decoding ------------------------------*/
-
/*----- That's all, folks -------------------------------------------------*/
#ifdef __cplusplus
static const struct tvec_flag fltrnd_flags[] = {
/* Standard rounding modes. */
- { "zero", 0xffff, FLTRND_ZERO },
- { "projinf", 0xffff, FLTRND_PROJINF },
- { "posinf", 0xffff, FLTRND_POSINF },
- { "neginf", 0xffff, FLTRND_NEGINF },
- { "odd", 0xffff, FLTRND_ODD },
- { "even", 0xffff, FLTRND_EVEN },
- { "nearest-even", 0xffff, FLTRND_NEAREVEN },
- { "nearest-odd", 0xffff, FLTRND_NEARODD },
- { "nearest-zero", 0xffff, FLTRND_NEARZERO },
- { "nearest-projinf", 0xffff, FLTRND_NEARINF },
- { "nearest-neginf", 0xffff, FLTRND_NEARNEG },
- { "nearest-posinf", 0xffff, FLTRND_NEARPOS },
+ { "zero", 0xffffu, FLTRND_ZERO },
+ { "projinf", 0xffffu, FLTRND_PROJINF },
+ { "posinf", 0xffffu, FLTRND_POSINF },
+ { "neginf", 0xffffu, FLTRND_NEGINF },
+ { "odd", 0xffffu, FLTRND_ODD },
+ { "even", 0xffffu, FLTRND_EVEN },
+ { "nearest-even", 0xffffu, FLTRND_NEAREVEN },
+ { "nearest-odd", 0xffffu, FLTRND_NEARODD },
+ { "nearest-zero", 0xffffu, FLTRND_NEARZERO },
+ { "nearest-projinf", 0xffffu, FLTRND_NEARINF },
+ { "nearest-neginf", 0xffffu, FLTRND_NEARNEG },
+ { "nearest-posinf", 0xffffu, FLTRND_NEARPOS },
/* Rounding mode bits: rounds away from zero in the listed conditions.
* The notation corresponds to rounding predicates as follows. The syntax
* <h> @HALF@ %|0|% %|1|%
* <r> @LOW@ %|0|% %|1|%
*/
- { "+0.00", 0x0001, 0x0001 },
- { "+0.01", 0x0002, 0x0002 },
- { "+0.10", 0x0004, 0x0004 },
- { "+0.11", 0x0008, 0x0008 },
- { "+1.00", 0x0010, 0x0010 },
- { "+1.01", 0x0020, 0x0020 },
- { "+1.10", 0x0040, 0x0040 },
- { "+1.11", 0x0080, 0x0080 },
- { "-0.00", 0x0100, 0x0100 },
- { "-0.01", 0x0200, 0x0200 },
- { "-0.10", 0x0400, 0x0400 },
- { "-0.11", 0x0800, 0x0800 },
- { "-1.00", 0x1000, 0x1000 },
- { "-1.01", 0x2000, 0x2000 },
- { "-1.10", 0x4000, 0x4000 },
- { "-1.11", 0x8000, 0x8000 },
+ { "+0.00", 0x0001u, 0x0001u },
+ { "+0.01", 0x0002u, 0x0002u },
+ { "+0.10", 0x0004u, 0x0004u },
+ { "+0.11", 0x0008u, 0x0008u },
+ { "+1.00", 0x0010u, 0x0010u },
+ { "+1.01", 0x0020u, 0x0020u },
+ { "+1.10", 0x0040u, 0x0040u },
+ { "+1.11", 0x0080u, 0x0080u },
+ { "-0.00", 0x0100u, 0x0100u },
+ { "-0.01", 0x0200u, 0x0200u },
+ { "-0.10", 0x0400u, 0x0400u },
+ { "-0.11", 0x0800u, 0x0800u },
+ { "-1.00", 0x1000u, 0x1000u },
+ { "-1.01", 0x2000u, 0x2000u },
+ { "-1.10", 0x4000u, 0x4000u },
+ { "-1.11", 0x8000u, 0x8000u },
/* Phew! */
TVEC_ENDFLAGS