From 289651a7df7bc48724137cd0faaf8535fb54e73b Mon Sep 17 00:00:00 2001 Message-Id: <289651a7df7bc48724137cd0faaf8535fb54e73b.1718337449.git.mdw@distorted.org.uk> From: Mark Wooding Date: Mon, 22 Apr 2024 18:15:31 +0100 Subject: [PATCH] @@@ mostly bench docs Organization: Straylight/Edgeware From: Mark Wooding --- struct/buf-float.c | 183 +++++-------- struct/buf.h | 12 +- test/bench.3.in | 550 +++++++++++++++++++++++++++++++++------ test/bench.c | 27 +- test/bench.h | 280 +++++++++----------- test/example/Makefile.am | 9 + test/example/adhoc.c | 66 +---- test/example/bench.c | 91 +++++++ test/example/example.h | 17 ++ test/example/fib.c | 71 +++++ utils/align.3.in | 48 +++- utils/fltfmt.c | 4 +- utils/macros.h | 15 +- utils/maths.h | 6 - utils/t/fltfmt-test.c | 56 ++-- 15 files changed, 953 insertions(+), 482 deletions(-) create mode 100644 test/example/bench.c create mode 100644 test/example/fib.c diff --git a/struct/buf-float.c b/struct/buf-float.c index 5b5b625..e163ff7 100644 --- a/struct/buf-float.c +++ b/struct/buf-float.c @@ -33,8 +33,21 @@ #include "bits.h" #include "buf.h" #include "fltfmt.h" +#include "macros.h" -/*----- External functions ------------------------------------------------*/ +/*----- Constants ---------------------------------------------------------*/ + +/* Tolerable errors. These aren't great, and all of them imply a failure to + * faithfully pass the value on, but they're also the inevitable consequence + * of having different floating-point systems. + */ +#define IGNERR (FLTERR_INEXACT | FLTERR_OFLOW | FLTERR_UFLOW) + +/*----- Main code ---------------------------------------------------------*/ + +#define FORMATS(_) \ + _(flt, float, f32, 4) \ + _(dbl, double, f64, 8) /* --- @buf_getf{32,64}{,l,b} --- * * @@ -49,67 +62,29 @@ * the usual round-to-nearest/ties-to-even rounding mode. */ -int buf_getf32(buf *b, float *x_out) -{ - const octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_f32btoflt(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int buf_getf32l(buf *b, float *x_out) -{ - const octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int buf_getf32b(buf *b, float *x_out) -{ - const octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int (dbuf_getf32)(dbuf *db, float *x_out) - { return (dbuf_getf32(db, x_out)); } -int (dbuf_getf32l)(dbuf *db, float *x_out) - { return (dbuf_getf32l(db, x_out)); } -int (dbuf_getf32b)(dbuf *db, float *x_out) - { return (dbuf_getf32b(db, x_out)); } - -int buf_getf64(buf *b, double *x_out) -{ - const octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_f64btodbl(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int buf_getf64l(buf *b, double *x_out) -{ - const octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int buf_getf64b(buf *b, double *x_out) -{ - const octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0); -} - -int (dbuf_getf64)(dbuf *db, double *x_out) - { return (dbuf_getf64(db, x_out)); } -int (dbuf_getf64l)(dbuf *db, double *x_out) - { return (dbuf_getf64l(db, x_out)); } -int (dbuf_getf64b)(dbuf *db, double *x_out) - { return (dbuf_getf64b(db, x_out)); } +#define DEFGET1(ty, cty, fty, e, xe, w) \ + int GLUE3(buf_get, fty, xe)(buf *b, cty *x_out) \ + { \ + const octet *p; \ + unsigned err; \ + \ + p = buf_get(b, w); if (!p) return (-1); \ + err = fltfmt_##fty##e##to##ty(x_out, p, FLTRND_NEAREVEN); \ + if (err&~IGNERR) { BBREAK(b); return (-1); } \ + return (0); \ + } \ + int (GLUE3(dbuf_get, fty, xe))(dbuf *db, cty *x_out) \ + { return (GLUE3(dbuf_get, fty, xe)(db, x_out)); } + +#define DEFGET(ty, cty, fty, w) \ + DEFGET1(ty, cty, fty, b, EMPTY, w) \ + DEFGET1(ty, cty, fty, l, l, w) \ + DEFGET1(ty, cty, fty, b, b, w) + +FORMATS(DEFGET) + +#undef DEFGET1 +#undef DEFGET /* --- @buf_putf{32,64}{,l,b} --- * * @@ -124,66 +99,28 @@ int (dbuf_getf64b)(dbuf *db, double *x_out) * the usual round-to-nearest/ties-to-even rounding mode. */ -int buf_putf32(buf *b, float x) -{ - octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0); -} - -int buf_putf32l(buf *b, float x) -{ - octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_flttof32l(p, x, FLTRND_NEAREVEN); return (0); -} - -int buf_putf32b(buf *b, float x) -{ - octet *p; - - p = buf_get(b, 4); if (!p) return (-1); - fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0); -} - -int (dbuf_putf32)(dbuf *db, float x) - { return (dbuf_putf32(db, x)); } -int (dbuf_putf32l)(dbuf *db, float x) - { return (dbuf_putf32l(db, x)); } -int (dbuf_putf32b)(dbuf *db, float x) - { return (dbuf_putf32b(db, x)); } - -int buf_putf64(buf *b, double x) -{ - octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0); -} - -int buf_putf64l(buf *b, double x) -{ - octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_dbltof64l(p, x, FLTRND_NEAREVEN); return (0); -} - -int buf_putf64b(buf *b, double x) -{ - octet *p; - - p = buf_get(b, 8); if (!p) return (-1); - fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0); -} - -int (dbuf_putf64)(dbuf *db, double x) - { return (dbuf_putf64(db, x)); } -int (dbuf_putf64l)(dbuf *db, double x) - { return (dbuf_putf64l(db, x)); } -int (dbuf_putf64b)(dbuf *db, double x) - { return (dbuf_putf64b(db, x)); } +#define DEFPUT1(ty, cty, fty, e, xe, w) \ + int GLUE3(buf_put, fty, xe)(buf *b, cty x) \ + { \ + octet *p; \ + unsigned err; \ + \ + p = buf_get(b, w); if (!p) return (-1); \ + err = fltfmt_##ty##to##fty##e(p, x, FLTRND_NEAREVEN); \ + if (err&~IGNERR) { BBREAK(b); return (-1); } \ + return (0); \ + } \ + int (GLUE3(dbuf_put, fty, xe))(dbuf *db, cty x) \ + { return (GLUE3(dbuf_put, fty, xe)(db, x)); } + +#define DEFPUT(ty, cty, fty, w) \ + DEFPUT1(ty, cty, fty, b, EMPTY, w) \ + DEFPUT1(ty, cty, fty, l, l, w) \ + DEFPUT1(ty, cty, fty, b, b, w) + +FORMATS(DEFPUT) + +#undef DEFPUT1 +#undef DEFPUT /*----- That's all, folks -------------------------------------------------*/ diff --git a/struct/buf.h b/struct/buf.h index 2cfffa9..e346b37 100644 --- a/struct/buf.h +++ b/struct/buf.h @@ -630,9 +630,9 @@ extern int buf_getf32b(buf */*b*/, float */*x_out*/); #define dbuf_getf32l(db, x_out) (buf_getf32l(DBUF_BUF(db), (x_out))) #define dbuf_getf32b(db, x_out) (buf_getf32b(DBUF_BUF(db), (x_out))) -extern int dbuf_getf64(dbuf */*db*/, double */*x_out*/); -extern int dbuf_getf64l(dbuf */*db*/, double */*x_out*/); -extern int dbuf_getf64b(dbuf */*db*/, double */*x_out*/); +extern int buf_getf64(buf */*b*/, double */*x_out*/); +extern int buf_getf64l(buf */*b*/, double */*x_out*/); +extern int buf_getf64b(buf */*b*/, double */*x_out*/); #define dbuf_getf64(db, x_out) (buf_getf64(DBUF_BUF(db), (x_out))) #define dbuf_getf64l(db, x_out) (buf_getf64l(DBUF_BUF(db), (x_out))) #define dbuf_getf64b(db, x_out) (buf_getf64b(DBUF_BUF(db), (x_out))) @@ -657,9 +657,9 @@ extern int buf_putf32b(buf */*b*/, float /*x*/); #define dbuf_putf32l(db, x) (buf_putf32l(DBUF_BUF(db), (x))) #define dbuf_putf32b(db, x) (buf_putf32b(DBUF_BUF(db), (x))) -extern int dbuf_putf64(dbuf */*db*/, double /*x*/); -extern int dbuf_putf64l(dbuf */*db*/, double /*x*/); -extern int dbuf_putf64b(dbuf */*db*/, double /*x*/); +extern int buf_putf64(buf */*b*/, double /*x*/); +extern int buf_putf64l(buf */*b*/, double /*x*/); +extern int buf_putf64b(buf */*b*/, double /*x*/); #define dbuf_putf64(db, x) (buf_putf64(DBUF_BUF(db), (x))) #define dbuf_putf64l(db, x) (buf_putf64l(DBUF_BUF(db), (x))) #define dbuf_putf64b(db, x) (buf_putf64b(DBUF_BUF(db), (x))) diff --git a/test/bench.3.in b/test/bench.3.in index b4fc429..dd61af9 100644 --- a/test/bench.3.in +++ b/test/bench.3.in @@ -30,11 +30,22 @@ .\"-------------------------------------------------------------------------- .TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library" .\" @bench_createtimer +.\" @BENCH_TIMELOOP_DECLS +.\" @BENCH_TIMELOOP_TAG +. .\" @bench_init .\" @bench_destroy .\" @bench_calibrate +.\" @bench_preflight +.\" @bench_adapt +.\" @bench_adjust +.\" @BENCH_MEASURE_DECLS +.\" @BENCH_MEASURE_TAG +.\" @BENCH_MEASURE .\" @bench_measure . +.\" @bench_report +. .\"-------------------------------------------------------------------------- .SH NAME bench \- low-level benchmarking tools @@ -45,6 +56,10 @@ bench \- low-level benchmarking tools .nf .B "#include " .PP +.B "#define BTF_TIMEOK ..." +.B "#define BTF_CYOK ..." +.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)" +.PP .ta 2n +2n +2n .B "struct bench_time {" .B " unsigned f;" @@ -71,7 +86,7 @@ bench \- low-level benchmarking tools .ta 2n +\w'\fBint (*now)('u .BI " int (*now)(struct bench_timer *" bt , .BI " struct bench_time *" t_out ", unsigned " f ); -.ta 2n +\w'\void (*diff)('u +.ta 2n +\w'\fBvoid (*diff)('u .BI " void (*diff)(struct bench_timer *" bt , .BI " struct bench_timing *" delta_out , .BI " const struct bench_time *" t0 , @@ -80,8 +95,21 @@ bench \- low-level benchmarking tools .B "};" .B "struct bench_timer {" .B " const struct bench_timerops *ops;" +.B " unsigned ref;" .B "};" .PP +.B "struct bench_timer *bench_createtimer(void);" +.B "BENCH_TIMELOOP_DECLS;" +.ta 2n \w'\fBBENCH_TIMELOOP_TAG('u +.BI "BENCH_TIMELOOP_TAG(" tag ", struct bench_timer *" tm , +.BI " struct bench_timing *" delta_out ", double " n , +.BI " " onbreak ) +.BI " " stmt +.PP +.B "#define BTF_CLB ..." +.B "#define BTF_INDIRECT ..." +.PP +.ta 2n .B "struct bench_state {" .B " unsigned f;" .B " double target_s;" @@ -90,19 +118,40 @@ bench \- low-level benchmarking tools .PP .BI "typedef void bench_fn(unsigned long " n ", void *" ctx ); .PP -.B "#define BTF_TIMEOK ..." -.B "#define BTF_CYOK ..." -.B "#define BTF_CLB ..." -.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)" -.PP -.B "struct bench_timer *bench_createtimer(void);" -.PP .BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm ); .BI "void bench_destroy(struct bench_state *" b ); -.BI "int bench_calibrate(struct bench_state *" b ); +.BI "int bench_calibrate(struct bench_state *" b ", unsigned " f ); +.BI "int bench_preflight(struct bench_state *" b ); +.ta \w'\fBint bench_adapt('u +.BI "int bench_adapt(struct bench_state *" b ", double *" n_inout , +.BI " const struct bench_timing *" t ); +.ta \w'\fBint bench_adjust('u +.BI "int bench_adjust(struct bench_state *" b ", struct bench_timing *" t_inout , +.BI " double " n ", double " base ); +.B "BENCH_MEASURE_DECLS;" +.ta 2n \w'\fBBENCH_MEASURE_TAG('u +.BI "BENCH_MEASURE_TAG(" tag ", struct bench_state *" b , +.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae ) +.BI " " stmt +.ta 2n \w'\fBBENCH_MEASURE('u +.BI "BENCH_MEASURE(struct bench_state *" b , +.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae ) +.BI " " stmt .ta \w'\fBint bench_measure('u .BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out , .BI " double " base ", bench_fn *" fn ", void *" ctx ); +.PP +.ta 2n +.B "enum {" +.B " BTU_OP = 0," +.B " BTU_BYTE = 1," +.B " ..." +.BI " BTU_LIMIT = " n +.B "};" +.ta \w'\fBvoid bench_report('u +.BI "void bench_report(const struct gprintf_ops *" gops ", void *" go , +.BI " unsigned " unit ", const struct bench_timing *" t ); +.PP .fi . .\"-------------------------------------------------------------------------- @@ -113,17 +162,20 @@ The header file provides declarations and defintions for performing low-level benchmarks. .PP -The `main event' is -.BR bench_measure . -This function will be described in detail later, +The `main event' are the +.B BENCH_MEASURE +macro and +.B bench_measure +function. +These will be described in detail later, but, in brief, -it calls a caller-provided function, +they execute a caller-provided piece of code instructing it to run adaptively chosen numbers of iterations, in order to get a reasonably reliable measurement of its running time, -and then reports its results by filling in a structure. +and then report the results by filling in a structure. .PP -With understanding this function as our objective, -we must examine all of the pieces involved in making it work. +With understanding these as our objective, +we must examine all of the pieces involved in making them work. . .SS Timers in general A @@ -133,11 +185,14 @@ in seconds (ideally precise to tiny fractions of a second), and/or in CPU cycles. A timer is represented by a pointer to an object of type .BR "struct bench_timer" . -This structure has a single member, +This structure has two members: .BR ops , pointing to a .BR "struct bench_timerops" , -which is a table of function pointers; +which is a table of function pointers, +and +.BR ref , +which is a simple reference count; typically, a timer has more data following this, but this fact is not exposed to applications. .PP @@ -152,7 +207,7 @@ must always point to the timer object itself. .IB tm ->ops->describe( tm ", " d) Write a description of the timer to the dynamic string .IR d . -.TPc +.TP .IB tm ->ops->preflight( tm ) Ensure that the timer is in working order, and perform any necessary per-thread or per-process setup. @@ -193,6 +248,18 @@ and Destroy the timer, releasing all of the resources that it holds. .PP +In a freshly-created timer, the +.B ref +member is 1. +Applications are expected to handle the reference count themselves; +the +.B destroy +function does not check or decrement the count. +Code for destroying the timer when it's no longer needed +might look like this. +.VS +if (!--tm->ref) tm->ops->destroy(tm); +.VE A .B bench_timing structure reports the difference between two times, @@ -224,10 +291,15 @@ is nonzero (true) if the timer returned any valid timing information. .TP .B n -The number of iterations performed by the benchmark function +The number of units processed the benchmark computation on its satisfactory run, -multiplied by -.IR base . +multiplied by a given +.IR base +\(en see +.BR BENCH_MEASURE , +.BR bench_measure , +and +.BR bench_adjust . .TP .B t The time taken for the satisfactory run of the benchmark function, @@ -270,6 +342,104 @@ member stores flags and or .B BTF_CYOK if the passage-of-time or cycle count respectively are valid. +.PP +The +.B BENCH_TIMELOOP_TAG +macro uses a timer to measure a number of iterations of a computation. +It requires the declarations made by +.B BENCH_TIMELOOP_DECLS +to be in scope, +ideally within an enclosing block +(rather than at top-level, +where they'll have static storage duration, +and take longer to access). +The macro's expansion is syntactically a statement head; +see +.BR control (3) +for details about the underlying machinery. +In more detail, the macro is invoked as +.IP +.nf +.ta 2n +.BI "BENCH_TIMELOOP_TAG(" tag ", " tm ", " delta_out ", " n ", " onbreak ) +.BI " " stmt +.fi +.PP +The +.I tag +argument is used to distinguish +the labels used internally by the macro: +see +.BR control (3) +for details about tags. +The macro calls on the timer +.I tm +to determine the initial time and cycle counts, +performs +.I n +iterations of some computation, +and calls on the timer a second time +to determine the final time and cycle counts, +and to store the difference in +.BI * delta_out \fR. +The +.I stmt +may be any C statement: +when it is executed, +the variable +.BR _bench_n , +of type +.BR "unsigned long" , +is in scope. +The statement should perform +.B _bench_n +iterations of the computation to be measured +\(en and do as little else as possible. +The argument +.I n +to the macro +may be larger than +.BR ULONG_MAX : +the macro will execute +.I stmt +multiple times if necessary. +The statement is allowed to clobber +.BR _bench_n . +Note that +.B BENCH_TIMELOOP_TAG +does +.I not +call the timer's +.B preflight +function. +If the +.I stmt +executes a free +.B break +statement +then the statement +.I onbreak +is executed; +a free +.B continue +statement within +.I stmt +currently does not have a useful behaviour. +Free +.B break +and +.B continue +statements within +.I onbreak +behave normally. +(See +.BR control (3) +for a definition of +`free' +.B break +and +.B continue +statements.) . .SS The built-in timer The function @@ -383,7 +553,12 @@ the similarly-named clock subtimer does nothing except check that the .B linux-x86-perf-rdpmc-hw-cycles cycle subtimer has been selected. -This is almost certainly the best choice if it's available. +This is almost certainly the best choice if it's available; +It is, however, not compatible with (at least some versions of) +.BR valgrind (1); +it will detect that it is running under +.B valgrind +and fail to initialize. .TP .B x86-rdtscp Counts CPU cycles using the x86 @@ -446,7 +621,10 @@ If is called with a non-null timer pointer, then it will not fail; the benchmark state will be initialized, -and the function returns zero. +and the function returns zero; +the timer's reference count is +.I not +incremented. If the timer pointer is null, then .B bench_init @@ -457,29 +635,30 @@ If this succeeds, then the benchmark state will be initialized, and the function returns zero. In both cases, -the timer becomes owned by the benchmark state: +the timer reference becomes owned by the benchmark state: calling .B bench_destroy -on the benchmark state will destroy the timer. +on the benchmark state will decrement the timer's reference count, +and destroy it unless it has additional outstanding references. If .B bench_init is called with a null timer pointer, and its attempt to create a timer for itself fails, then .B bench_init -returns \-1; +returns \-1: the benchmark state is not initialized -and can safely be discarded; -calling -safe to call -.B bench_destroy -on the unsuccessfully benchmark state is safe and has no effect. +and can safely be discarded. .PP Calling .B bench_destroy on a benchmark state releases any resources it holds, most notably its timer, if any. +Calling +.B bench_destroy +on an unsuccessfully initialized benchmark state +is safe but has no effect. .PP Although .B struct bench_state @@ -510,9 +689,35 @@ Either or both of these calibrations can succeed or fail; if passage-of-time calibration fails, then cycle count calibration is impossible. .PP +The benchmarking state must be calibrated differently +for different kinds of timing loop; +this is controlled by the flags passed as the +.I f +argument to +.BR bench_calibrate . +The main difference lies in whether the code to be measured +is called +.IR indirectly , +i.e., via a function pointer. +Set +.B BTF_INDIRECT +if the code is to be called indirectly; +leave this flag clear if the code is called directly. +The +.B bench_measure +function always makes indirect calls; +the +.B BENCH_MEASURE +macro does not itself make indirect calls. +Usually, a program needs only one or the other; +if both are necessary for some reason, +the best approach is just to set up two benchmarking states +sharing the same timer, +and calibrate them separately. +.PP When it completes, .B bench_calibrate -sets flag in the benchmark state's +sets flags in the benchmark state's .B f member: if passage-of-time calibration succeeded, @@ -537,11 +742,96 @@ is called for a second or subsequent time on the same benchmark state, it returns immediately, either returning 0 or \-1 according to whether passage-of-time had previously been calibrated. -. -.SS Timing functions -A +.PP +The +.B BENCH_MEASURE +macro measures the performance of a computation. +It requires the declarations made by +.B BENCH_MEASURE_DECLS +to be in scope, +ideally within an enclosing block +(rather than at top-level, +where they'll have static storage duration, +and take longer to access). +The macro's expansion is syntactically a statement head; +see +.BR control (3) +for details about the underlying machinery. +In more detail, the macro is invoked as +.IP +.nf +.ta 2n +.BI "BENCH_MEASURE(" b ", " rc ", " t_out ", " base ) +.BI " " stmt +.fi +.PP +The +.I stmt +can be any C statement; +it should perform +.B _bench_n +iterations of the computation to be measured. +(The variable +.B _bench_n +is declarared as part of +.B BENCH_MEASURE_DECLS +and has type +.BR "unsigned long" . +Before commencing measurement proper, +the macro calls +.BR bench_preflight , +described below, +to check that everything is set up properly +for measurements on the current thread; +if this fails, then the macro sets +.I rc +to \-1. +Otherwise, the macro executes +.I stmt +one or more times, +with the objective of finding an iteration count +.I n +such that +.I n +iterations of the computation take more than +.IB b ->target_s "" \fR/\(sr2 +seconds. +If measurement fails, +then +.I rc +is set to \-1; +otherwise, +.I rc +is set to zero, and +.BI * t_out +is filled in with the measurement; +.IB t_out ->n +is set to +.IR n "\ \(mu\ " base . +.PP +The +.B BENCH_MEASURE_TAG +macro works just like +.B BENCH_MEASURE +except that it takes an additional +.I tag +argument used to distinguish the internal labels +used by the macro's implementation; +this makes it possible to use +.B BENCH_MEASURE_TAG +as a component in more complex macros. +See +.BR control (3) +for details about control-structure macros +and the meaning and format of tags. +.PP +The function +.B bench_measure +is similar, +except that it calls a .I benchmark function -has the signature +to perform the computation. +A benchmark function has the signature .IP .BI "void " fn "(unsigned long " n ", void *" ctx ); .PP @@ -553,61 +843,153 @@ The argument is a pointer passed into .B bench_measure for the benchmark function's own purposes. -.PP -The function +The .B bench_measure -receives five arguments. -.TP -.I b -points to the benchmark state to be used. -.TP -.I t_out -is the address of a -.BR struct bench_timing -in which the measurement should be left. -This structure is described below. -.TP -.I base -is a count of the number of operations performed -by each iteration of the benchmark function. -.TP -.I fn -is a benchmark function, described above. -.TP -.I ctx -is a pointer to be passed to the benchmark function. +function returns zero on success, +or \-1 on failure. +Note that .B bench_measure -does not interpret this pointer in any way. +invokes the benchmark indirectly, +so the benchmark state should have been calibrated with +.BR BTF_INDIRECT . +. +.SS Measurement utilities +The following functions are primarily exported for the benefit of the +.B BENCH_MEASURE +macro, +but are documented here in case they are useful. .PP The -.B bench_measure -function calls its benchark function repeatedly -with different iteration counts -.IR n , -with the objective that the call take approximately -.B target_s -seconds, as established in the benchmark state. -(Currently, if -.B target_s -holds the value -.IR t , -then -.B bench_measure -is satisfied when a call takes at least -.IR t /\(sr2\*,s.) -Once the function finds a satisfactory number of iterations, -it stores the results in -.BI * t_out \fR. -If measurement succeeds, then -.B bench_measure -returns zero. -If it fails \(en -most likely because the timer failed \(en -then it returns \-1. +.B bench_preflight +function prepares a benchmarking state for use. +It checks that the timer is calibrated +and suitable for measuring passage-of-time; +it also calls the timer's +.B preflight +function to prepare it for measurements on the current thread. +If these checks succeed, then +.B bench_preflight +returns zero; +otherwise it returns \-1 +and the caller should not proceed with measurements. +.PP +The +.B bench_adapt +function is used to determine iteration counts. +It is used in a loop such as the following. +.IP +.nf +.ta 2n +2n +.B "BENCH_TIMELOOP_DECLS;" +.B "struct bench_timer *tm;" +.B "struct bench_timing t;" +.B "double n = 1.0, target_s = 1.0;" +.IP +.B "do {" +.B " BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })" +.BI " " "(do " _bench_n " iterations of some computation)" ; +.B "} while (!bench_adapt(&n, target_s, &t));" +.fi +.PP +On entry, +.BI *n_inout +should be the number of iterations performed by the previous step, +and +.BI * t +the resulting time; +the +.B BTF_TIMEOK +flag must be set in +.IB t ->f \fR. +If the timing is sufficient \(en if +.IR t\fB->t "\ \*(>=\ " target_s /\(sr2 +\(en then +.B bench_adapt +returns a nonzero value to indicate that measurement is complete. +Otherwise, it sets +.BI * n_inout +to a new, larger iteration count +and returns zero to indicate that a further pass is necessary. +.PP +The +.B bench_adjust +function adjusts a raw timing, +as captured by +.BR BENCH_TIMELOOP_TAG , +according to the calibration data captured in +.IR b . +On exit, the timing data is updated, +and +.IB t ->n +is set to the product +.IR n "\ \(mu\ " base . +. +.SS Reporting results +The +.B bench_report +function formats a measurement result +into a human-readable string. +The function writes its output using the +generalized output formatting operations +.I gops +and output pointer +.IR go ; +see +.BR gprintf (3) +for details on generalized output formatting. +The +.I unit +argument describes the unit of activity being measured: +.TP +.B BTU_OP +counts operations of some unspecified nature, while +.TP +.B BTU_BYTE +counts a number of bytes processed. +.PP +These are presented differently +\(em in particular, +quantities bytes are expressed using binary scaling rather than decimal. +The timing to report is given by the +.I t +argument; +.IB t ->n +gives the number of units processed. +. +.\"-------------------------------------------------------------------------- +.SH EXAMPLE . +The following macros offer a fairly simple example of +how the benchmarking functions and macros can be used. +.VS +.ta 2n +2n +2n 2n+\w'\fBBENCH_MEASURE_TAG('u \n(.lu-\n(.iu-4n +#define BENCHMARK_DECLS \e + struct bench_timing _bmark_t; \e + int _bmark_rc; \e + BENCH_MEASURE_DECLS +.VP +#define BENCHMARK_TAG(tag, b, unit, base) \e + MC_BEFORE(tag##__benchmark_before, { fflush(stdout); }) \e + MC_AFTER(tag##__benchmark_after, { \e + if (_bmark_rc) \e + printf(": FAILED\en"); \e + else { \e + fputs(": ", stdout); \e + bench_report(&file_printops, stdout, (unit), &_bmark_tm);\ \e + putchar('\n'); \e + } \e + }) \e + BENCH_MEASURE_TAG(tag##__bmarkmark_measure, \e + (b), _bmark_rc, &_bmark_t, (base)) +#define BENCHMARK(b, unit, base) \e + BENCHMARK_TAG(bench, b, unit, base) +.VE + .\"-------------------------------------------------------------------------- .SH "SEE ALSO" . +.BR control (3), +.BR macros (3), .BR tvec-bench (3), .BR mLib (3). . diff --git a/test/bench.c b/test/bench.c index 2ac63b4..493f227 100644 --- a/test/bench.c +++ b/test/bench.c @@ -963,7 +963,7 @@ struct bench_timer *bench_createtimer(const char *config) if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end; /* All is done. */ - t->_t.ops = &timer_ops; ret = &t->_t; t = 0; + t->_t.ops = &timer_ops; t->_t.ref = 1; ret = &t->_t; t = 0; end: if (t) timer_destroy(&t->_t); return (ret); @@ -1017,7 +1017,7 @@ end: */ void bench_destroy(struct bench_state *b) - { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } } + { if (b->tm && !--b->tm->ref) { b->tm->ops->destroy(b->tm); b->tm = 0; } } /* --- @spin@ --- * * @@ -1080,7 +1080,7 @@ int bench_calibrate(struct bench_state *b, unsigned f) */ if (f&BTF_INDIRECT) { for (i = 0; i < 50; i++) - BENCH_TIMELOOP_TAG(setup, b, &delta, 10000) + BENCH_TIMELOOP_TAG(setup, b->tm, &delta, 10000, ;) LAUNDER(&spin)(_bench_n, 0); } @@ -1093,10 +1093,10 @@ int bench_calibrate(struct bench_state *b, unsigned f) /* Measure @n@ iterations of the idle loop. */ if (f&BTF_INDIRECT) - BENCH_TIMELOOP_TAG(calibrate, b, &delta, n) + BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;) LAUNDER(&spin)(_bench_n, 0); else - BENCH_TIMELOOP_TAG(calibrate, b, &delta, n) + BENCH_TIMELOOP_TAG(calibrate, b->tm, &delta, n, ;) while (_bench_n--) RELAX; tf &= delta.f; if (!(tf&BTF_TIMEOK)) { rc = -1; goto end; } @@ -1162,8 +1162,8 @@ int bench_preflight(struct bench_state *b) /* --- @bench_adapt@ --- * * - * Arguments: @struct bench_state *b@ = benchmark state - * @double *n_inout@ = number of iterations, updated + * Arguments: @double *n_inout@ = number of iterations, updated + * @double target_s@ = target time in seconds * @const struct bench_timing *t@ = timing from the previous run * * Returns: Nonzero if the measurement is sufficient; zero to run again. @@ -1182,15 +1182,15 @@ int bench_preflight(struct bench_state *b) * On entry, @*n_inout@ should be the number of iterations * performed by the previous pass, and @*t@ the resulting time; * the @BTF_TIMEOK@ flag must be set @t->f@. If the timing is - * sufficient -- @t->t@ is sufficiently close to @b->target_s@ + * sufficient -- @t->t@ is sufficiently close to @target_s@ * -- then the function returns nonzero to indicate that * measurement is complete. Otherwise, it sets @*n_inout@ to a * new, larger iteration count and returns zero to indicate that * a further pass is necessary. */ -int bench_adapt(struct bench_state *b, - double *n_inout, const struct bench_timing *t) +int bench_adapt(double *n_inout, double target_s, + const struct bench_timing *t) { double n = *n_inout, nn; @@ -1208,8 +1208,8 @@ int bench_adapt(struct bench_state *b, * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying * again with %$n' = n + 1$% iterations will very likely work. */ - if (t->t >= 0.707*b->target_s) return (1); - nn = n*b->target_s/t->t; modf(nn, &nn); + if (t->t >= 0.707*target_s) return (1); + nn = n*target_s/t->t; modf(nn, &nn); *n_inout = nn > n ? nn : n + 1; return (0); } @@ -1279,8 +1279,7 @@ int bench_measure(struct bench_state *b, struct bench_timing *t_out, BENCH_MEASURE_DECLS; int rc; - BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base) - fn(_bench_n, ctx); + BENCH_MEASURE(b, rc, t_out, base) fn(_bench_n, ctx); return (rc); } diff --git a/test/bench.h b/test/bench.h index 3756247..05c1eee 100644 --- a/test/bench.h +++ b/test/bench.h @@ -49,11 +49,15 @@ # include "dstr.h" #endif +#ifndef MLIB_MACROS_H +# include "macros.h" +#endif + #ifndef MLIB_GPRINTF_H # include "gprintf.h" #endif -/*----- Data structures ---------------------------------------------------*/ +/*----- Timers ------------------------------------------------------------*/ struct bench_time { unsigned f; /* flags */ @@ -73,7 +77,10 @@ struct bench_timing { double n, t, cy; /* count, time, and cycles */ }; -struct bench_timer { const struct bench_timerops *ops; }; +struct bench_timer { + const struct bench_timerops *ops; + unsigned ref; +}; struct bench_timerops { void (*describe)(struct bench_timer */*bt*/, dstr */*d*/); @@ -105,24 +112,22 @@ struct bench_timerops { /* Release the timer and any resources it holds. */ }; -struct bench_state { - struct bench_timer *tm; /* a timer */ - double target_s; /* target time to run benchmarks */ - unsigned f; /* calibration flags (@BTF_...@) */ -#define BTF_CLB 0x0100 /* tried calibrating */ - struct { double m, c; } clk, cy; /* calculated overheads */ -}; - -typedef void bench_fn(unsigned long /*n*/, void */*ctx*/); - /* Run the benchmark @n@ times, given a context pointer @ctx@. */ - -enum { - BTU_OP, /* counting operations of some kind */ - BTU_BYTE, /* counting bytes (@rbuf >= 0@) */ - BTU_LIMIT /* (number of units) */ -}; +/* --- @bench_createtimer@ --- * + * + * Arguments: @const char *config@ = user-supplied configuration string + * + * Returns: A freshly constructed standard timer object, or null on + * failure. + * + * Use: Allocate a timer. Dispose of it by calling + * @tm->ops->destroy(tm)@ when you're done. + * + * Applications should not set configuration strings except as + * established by user action, e.g., from a command-line option, + * environment variable, or configuration file. + */ -/*----- Macros ------------------------------------------------------------*/ +extern struct bench_timer *bench_createtimer(const char *config); /* --- @BENCH_TIMELOOP_DECLS@ --- * * @@ -140,28 +145,28 @@ enum { /* --- @BENCH_TIMELOOP_TAG@ --- * * * Arguments: @tag@ = control structure tag - * @struct bench_state *b@ = benchmark state + * @struct bench_state *tm@ = benchmark timer * @struct bench_timing *delta_out@ = where to put the result * @double n@ = number of iterations * * Returns: --- * - * Use: @BENCH_TIMELOOP_TAG(tag, b, delta_out, n) stmt@ runs @stmt@ + * Use: @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@ * @n@ times, capturing the time and/or CPU cycles taken in * @*delta_out@. The iteration count must be no more than the * %%\emph{square}%% of @ULONG_MAX@. If @stmt@ executes a free - * @break@ statement, then the containing loop -- which ust + * @break@ statement, then the containing loop -- which must * exist -- is exited. * * This macro is not intended to be used directly by users: it * is used internally by @bench_calibrate@ and @BENCH_MEASURE@. */ -#define BENCH_TIMELOOP_TAG(tag, b, delta_out, n) \ +#define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak) \ MC_BEFORE(tag##__benchtl_setup, { \ double _R = ULONG_MAX; double _n = (n); \ \ - _bench_tm = (b)->tm; \ + _bench_tm = (tm); \ if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; } \ else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; } \ }) \ @@ -178,131 +183,18 @@ enum { { ; }, \ { MC_GOTARGET(tag##__benchtl_break); }) -/* --- @BENCH_MEASURE_DECLS@ --- * - * - * Arguments: --- - * - * Use: Expands to the declarations needed by @BENCH_MEASURE@. - * These should be at block scope, not at toplevel. - */ - -#define BENCH_MEASURE_DECLS \ - struct bench_state *_bench_b; \ - struct bench_timing *_bench_t; \ - double _bench_nn; \ - BENCH_TIMELOOP_DECLS - -/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- * - * - * Arguments: @tag@ = control structure tag - * @struct bench_state *b@ = benchmark state - * @int &rc@ = where to put the result code (zero on success, - * %$-1$% on failure) - * @struct bench_timing *t_out@ = where to put the result - * @double base@ = number of operations per external iteration - * - * Returns: --- - * - * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the - * execution of @stmt@. - * - * The statement should perform @_bench_n@ iterations of some - * operation. It will be invoked multiple times, with varying - * iteration counts, so as to run for approximately - * @b->target_s@ seconds. - * - * On success, the resulting timing is left in @*t_out@, with - * @t_out->n@ holding the product of the final iteration count - * and @base@, and @rc@ is set to zero. If the timer fails, or - * if @stmt@ invokes a free @break@ statement, then @rc@ is set - * to %$-1$%. - * - * The macro @BENCH_MEASURE_TAG@ is the same, except that it - * allows an explicit control-structure tag to be set so that it - * can be used within larger control structure macros. - */ - -#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \ - MC_BEFORE(tag##__benchmsr_setup, { \ - _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \ - if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \ - }) \ - MC_TARGET(tag##__benchmsr_done, \ - { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \ - MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \ - for (;;) \ - MC_WRAP(tag##__benchmsr_loop, \ - { ; }, \ - { _bench_t->f &= _bench_b->f; \ - if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \ - if (bench_adapt(_bench_b, &_bench_nn, _bench_t)) \ - MC_GOTARGET(tag##__benchmsr_done); \ - }, \ - { MC_GOTARGET(tag##__benchmsr_fail); }) \ - BENCH_TIMELOOP_TAG(tag##__benchmsr_time, _bench_b, _bench_t, _bench_nn) - -#define BENCH_MEASURE(b, rc, t_out, base) \ - BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base) - -/* --- @BENCHMARK_DECLS@ --- * - * - * Arguments: --- - * - * Use: Expands to the declarations needed by @BENCHMARK_TAG@. - * These should be at block scope, not at toplevel. - */ - -#define BENCHMARK_DECLS \ - struct bench_timing _bench_tm; \ - int _bench_rc; \ - BENCH_MEASURE_DECLS - -/* --- @BENCHMARK_TAG@ --- * - * - * Arguments: @tag@ = control structure tag - * @struct bench_state *b@ = benchmark state - * @struct gprintf_ops *gops, void *go@ = output formatter - * @unsigned unit@ = unit being measured (@BTU_...@ code) - * @double base@ = number of units per external iteration - * - * Returns: --- - * - * Use: @BENCHMARK_TAG(tag, b, gops, go, unit, base) stmt@ measures - * the execution of @stmt@ and writes a report to an output - * formatter. The @stmt@ should run @_bench_n@ iterations of - * the operation to be measured. - * - * No tagless version of this macro is provided, because it is - * expected to be useful primarily in the construction of - * higher-level macros. - */ - -#define BENCHMARK_TAG(tag, b, gops, go, unit, base) \ - MC_AFTER(tag##__benchmark_after, { \ - if (_bench_rc) gprintf((gops), (go), "FAILED"); \ - else bench_report((gops), (go), _bench_b, (unit), &_bench_tm); \ - }) \ - BENCH_MEASURE_TAG(tag##__benchmark_measure, \ - (b), _bench_rc, &_bench_tm, (base)) - -/*----- Functions provided ------------------------------------------------*/ +/*----- Measuring ---------------------------------------------------------*/ -/* --- @bench_createtimer@ --- * - * - * Arguments: @const char *config@ = user-supplied configuration string - * - * Returns: A freshly constructed standard timer object, or null on - * failure. - * - * Use: Allocate a timer. Dispose of it by calling - * @tm->ops->destroy(tm)@ when you're done. - * - * Applications should not set configuration strings except as - * established by user action, e.g., from a command-line option, - * environment variable, or configuration file. - */ +struct bench_state { + struct bench_timer *tm; /* a timer */ + double target_s; /* target time to run benchmarks */ + unsigned f; /* calibration flags (@BTF_...@) */ +#define BTF_CLB 0x0100 /* tried calibrating */ + struct { double m, c; } clk, cy; /* calculated overheads */ +}; -extern struct bench_timer *bench_createtimer(const char *config); +typedef void bench_fn(unsigned long /*n*/, void */*ctx*/); + /* Run the benchmark @n@ times, given a context pointer @ctx@. */ /* --- @bench_init@ --- * * @@ -377,8 +269,8 @@ extern int bench_preflight(struct bench_state */*b*/); /* --- @bench_adapt@ --- * * - * Arguments: @struct bench_state *b@ = benchmark state - * @double *n_inout@ = number of iterations, updated + * Arguments: @double *n_inout@ = number of iterations, updated + * @double target_s@ = target time in seconds * @const struct bench_timing *t@ = timing from the previous run * * Returns: Nonzero if the measurement is sufficient; zero to run again. @@ -387,17 +279,20 @@ extern int bench_preflight(struct bench_state */*b*/); * benchmark function to perform next. It is used in a loop * such as the following. * - * @double n = 1.0;@ + * @BENCH_TIMELOOP_DECLS;@ + * @struct bench_timer *tm;@ * @struct bench_timing t;@ + * @double n = 1.0, target_s = 1.0;@ * * @do {@ - * (run @n@ iterations; set @t@ to the timing) - * @} while (!bench_adapt(b, &n, &t));@ + * @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@ + * (do @_bench_n@ iterations of some computation)@;@ + * @} while (!bench_adapt(&n, target_s, &t));@ * * On entry, @*n_inout@ should be the number of iterations * performed by the previous pass, and @*t@ the resulting time; - * the @BTF_TIMEOK@ flag must be set @t->f@. If the timing is - * sufficient -- @t->t@ is sufficiently close to @b->target_s@ + * the @BTF_TIMEOK@ flag must be set in @t->f@. If the timing + * is sufficient -- @t->t@ is sufficiently close to @target_s@ * -- then the function returns nonzero to indicate that * measurement is complete. Otherwise, it sets @*n_inout@ to a * new, larger iteration count and returns zero to indicate that @@ -408,7 +303,7 @@ extern int bench_preflight(struct bench_state */*b*/); * @bench_measure@ function. */ -extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/, +extern int bench_adapt(double */*n_inout*/, double /*target_s*/, const struct bench_timing */*t*/); /* --- @bench_adjust@ --- * @@ -421,7 +316,7 @@ extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/, * * Returns: --- * - * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP@, + * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@, * according to the calibration data captured in @b@. * On exit, the timing data is updated, and @t->n@ is set to the * product @n*base@. @@ -435,6 +330,73 @@ extern void bench_adjust(struct bench_state */*b*/, struct bench_timing */*t_inout*/, double /*n*/, double /*base*/); +/* --- @BENCH_MEASURE_DECLS@ --- * + * + * Arguments: --- + * + * Use: Expands to the declarations needed by @BENCH_MEASURE@. + * These should be at block scope, not at toplevel. + */ + +#define BENCH_MEASURE_DECLS \ + struct bench_state *_bench_b; \ + struct bench_timing *_bench_t; \ + double _bench_nn; \ + BENCH_TIMELOOP_DECLS + +/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- * + * + * Arguments: @tag@ = control structure tag + * @struct bench_state *b@ = benchmark state + * @int &rc@ = where to put the result code (zero on success, + * %$-1$% on failure) + * @struct bench_timing *t_out@ = where to put the result + * @double base@ = number of operations per external iteration + * + * Returns: --- + * + * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the + * execution of @stmt@. + * + * The statement should perform @_bench_n@ iterations of some + * operation. It will be invoked multiple times, with varying + * iteration counts, so as to run for approximately + * @b->target_s@ seconds. + * + * On success, the resulting timing is left in @*t_out@, with + * @t_out->n@ holding the product of the final iteration count + * and @base@, and @rc@ is set to zero. If the timer fails, or + * if @stmt@ invokes a free @break@ statement, then @rc@ is set + * to %$-1$%. + * + * The macro @BENCH_MEASURE_TAG@ is the same, except that it + * allows an explicit control-structure tag to be set so that it + * can be used within larger control structure macros. + */ + +#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \ + MC_BEFORE(tag##__benchmsr_setup, { \ + _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \ + if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \ + }) \ + MC_TARGET(tag##__benchmsr_done, \ + { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \ + MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \ + for (;;) \ + MC_WRAP(tag##__benchmsr_loop, \ + { ; }, \ + { _bench_t->f &= _bench_b->f; \ + if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \ + if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t)) \ + MC_GOTARGET(tag##__benchmsr_done); \ + }, \ + { MC_GOTARGET(tag##__benchmsr_fail); }) \ + BENCH_TIMELOOP_TAG(tag##__benchmsr_time, \ + _bench_b->tm, _bench_t, _bench_nn, { break; }) + +#define BENCH_MEASURE(b, rc, t_out, base) \ + BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base) + /* --- @bench_measure@ --- * * * Arguments: @struct bench_state *b@ = benchmark state @@ -460,6 +422,12 @@ extern int bench_measure(struct bench_state */*b*/, /*----- Reporting ---------------------------------------------------------*/ +enum { + BTU_OP, /* counting operations of some kind */ + BTU_BYTE, /* counting bytes (@rbuf >= 0@) */ + BTU_LIMIT /* (number of units) */ +}; + /* --- @bench_report@ --- * * * Arguments: @const struct gprintf_ops *gops, void *gp@ = output formatter diff --git a/test/example/Makefile.am b/test/example/Makefile.am index eae66f8..663efbe 100644 --- a/test/example/Makefile.am +++ b/test/example/Makefile.am @@ -42,5 +42,14 @@ noinst_PROGRAMS += adhoc adhoc_SOURCES = adhoc_SOURCES += adhoc.c +adhoc_SOURCES += fib.c +adhoc_SOURCES += example.h + +noinst_PROGRAMS += bench +bench_SOURCES = + +bench_SOURCES += bench.c +bench_SOURCES += fib.c +bench_SOURCES += example.h ###----- That's all, folks -------------------------------------------------- diff --git a/test/example/adhoc.c b/test/example/adhoc.c index fbaa69f..d1d549e 100644 --- a/test/example/adhoc.c +++ b/test/example/adhoc.c @@ -27,7 +27,6 @@ /*----- Header files ------------------------------------------------------*/ -#include #include #include "macros.h" @@ -37,64 +36,21 @@ #include "tvec-bench.h" #include "tvec-types.h" -/*----- Main code ---------------------------------------------------------*/ - -/* Stupid but traditional recursive Fibonacci. */ -static unsigned long recfib(unsigned n) - { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); } - -/* Slightly less stupid but still traditional iterative Fibonacci. */ -static unsigned long iterfib(unsigned n) -{ - unsigned long u, v, t; - - for (u = 0, v = 1; n--; t = v, v = u, u += t); - return (u); -} +#include "example.h" -/* Sadly nontraditional intelligent Fibonacci. */ -static unsigned long expfib(unsigned n) -{ - unsigned long a, b, u, v, t; - - /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%. I claim that - * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%. Proof by induction: - * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$% - * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$% - * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$% - * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed. - * - * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$% - * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$% - * %$b d = (a c + a d + b c) \phi + (a c + b d)$%. In particular, - * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%. - */ - a = 0, b = 1; u = 1, v = 0; - if (n) - for (;;) { - if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; } - n /= 2; if (!n) break; - t = u*u; u = t + 2*u*v; v = t + v*v; - } - return (a); -} +/*----- Main code ---------------------------------------------------------*/ int main(int argc, char *argv[]) { struct tvec_state tvstate; int argpos; unsigned long i, a, b, t; + dstr d = DSTR_INIT; tvec_parseargs(argc, argv, &tvstate, &argpos, &tvec_adhocconfig); if (argpos < argc) die(2, "no input files expected"); tvec_adhoc(&tvstate); -#if (ULONG_MAX/65536 >> 16) >= 0xffffffff -# define FIBLIMIT 94 /* F_94 = 19740274219868223167 > 2^64 */ -#else -# define FIBLIMIT 48 /* F_48 = 4807526976 > 2^32 */ -#endif - TVEC_TESTGROUP(&tvstate, "fib-test") { for (i = 0, a = 0, b = 1; i < FIBLIMIT; i++, t = b, b = a, a += t) TVEC_TEST(&tvstate) { @@ -109,16 +65,20 @@ int main(int argc, char *argv[]) if (tvec_benchprep(&tvstate, &tvec_benchstate, 0)) break; - TVEC_BENCHMARK("recfib, n = 40", &tvstate, tvec_benchstate, BTU_OP, 1) - while (_bench_n--) ADMIRE(recfib(40)); - TVEC_BENCHMARK("iterfib, n = " STR(FIBLIMIT), - &tvstate, tvec_benchstate, BTU_OP, 1) + dstr_reset(&d); dstr_putf(&d, "recfib, n = %u", RECFIBLIMIT); + TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1) + while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT)); + + dstr_reset(&d); dstr_putf(&d, "iterfib, n = %u", FIBLIMIT); + TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1) while (_bench_n--) ADMIRE(iterfib(FIBLIMIT)); - TVEC_BENCHMARK("expfib, n = " STR(FIBLIMIT), - &tvstate, tvec_benchstate, BTU_OP, 1) + + dstr_reset(&d); dstr_putf(&d, "expfib, n = %u", FIBLIMIT); + TVEC_BENCHMARK(d.buf, &tvstate, tvec_benchstate, BTU_OP, 1) while (_bench_n--) ADMIRE(expfib(FIBLIMIT)); } + dstr_destroy(&d); return (tvec_end(&tvstate)); } diff --git a/test/example/bench.c b/test/example/bench.c new file mode 100644 index 0000000..144cf77 --- /dev/null +++ b/test/example/bench.c @@ -0,0 +1,91 @@ +/* -*-c-*- + * + * Demonstration of standalone benchmarking + * + * (c) 2024 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of the mLib utilities library. + * + * mLib is free software: you can redistribute it and/or modify it under + * the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * mLib is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with mLib. If not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include + +#include "macros.h" +#include "report.h" +#include "tvec.h" +#include "tvec-adhoc.h" +#include "tvec-bench.h" +#include "tvec-types.h" + +#include "example.h" + +/*----- Macro from the manpage --------------------------------------------*/ + +#define BENCHMARK_DECLS \ + struct bench_timing _bmark_t; \ + int _bmark_rc; \ + BENCH_MEASURE_DECLS + +#define BENCHMARK_TAG(tag, b, unit, base) \ + MC_BEFORE(tag##__benchmark_before, { fflush(stdout); }) \ + MC_AFTER(tag##__benchmark_after, { \ + if (_bmark_rc) \ + printf(": FAILED\n"); \ + else { \ + fputs(": ", stdout); \ + bench_report(&file_printops, stdout, (unit), &_bmark_t); \ + putchar('\n'); \ + } \ + }) \ + BENCH_MEASURE_TAG(tag##__benchmark_measure, \ + (b), _bmark_rc, &_bmark_t, (base)) +#define BENCHMARK(b, unit, base) BENCHMARK_TAG(bench, b, unit, base) + +/*----- Main code ---------------------------------------------------------*/ + +int main(void) +{ + struct bench_state b; + BENCHMARK_DECLS; + + if (bench_init(&b, 0)) + { fprintf(stderr, "timer setup failed\n"); exit(2); } + if (bench_calibrate(&b, 0)) + { fprintf(stderr, "timer calibration failed\n"); exit(2); } + + printf("recfib, n = %u", RECFIBLIMIT); + BENCHMARK(&b, BTU_OP, 1) + while (_bench_n--) ADMIRE(recfib(RECFIBLIMIT)); + + printf("iterfib, n = %u", FIBLIMIT); + BENCHMARK(&b, BTU_OP, 1) + while (_bench_n--) ADMIRE(iterfib(FIBLIMIT)); + + printf("expfib, n = %u", FIBLIMIT); + BENCHMARK(&b, BTU_OP, 1) + while (_bench_n--) ADMIRE(expfib(FIBLIMIT)); + + return (0); +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/test/example/example.h b/test/example/example.h index b3cdee8..ecbaa6b 100644 --- a/test/example/example.h +++ b/test/example/example.h @@ -34,6 +34,7 @@ /*----- Header files ------------------------------------------------------*/ +#include #include /*----- Functions provided ------------------------------------------------*/ @@ -47,6 +48,22 @@ extern int greet(char */*buf*/, size_t /*sz*/, const char */*name*/); * Return zero on success, or -1 on error. */ +extern unsigned long recfib(unsigned /*n*/); + /* Stupid but traditional recursive Fibonacci. */ + +extern unsigned long iterfib(unsigned /*n*/); + /* Slightly less stupid but still traditional iterative Fibonacci. */ + +extern unsigned long expfib(unsigned /*n*/); + /* Sadly nontraditional intelligent Fibonacci. */ + +#define RECFIBLIMIT 40 /* too slow beyond this */ +#if (ULONG_MAX/65536 >> 16) >= 0xffffffff +# define FIBLIMIT 94 /* F_94 = 19740274219868223167 > 2^64 */ +#else +# define FIBLIMIT 48 /* F_48 = 4807526976 > 2^32 */ +#endif + /*----- That's all, folks -------------------------------------------------*/ #ifdef __cplusplus diff --git a/test/example/fib.c b/test/example/fib.c new file mode 100644 index 0000000..1639aa4 --- /dev/null +++ b/test/example/fib.c @@ -0,0 +1,71 @@ +/* -*-c-*- + * + * Compute elements of the Fibonacci sequence + * + * (c) 2024 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of the mLib utilities library. + * + * mLib is free software: you can redistribute it and/or modify it under + * the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * mLib is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with mLib. If not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include "example.h" + +/*----- Main code ---------------------------------------------------------*/ + +unsigned long recfib(unsigned n) + { return (n <= 1 ? n : recfib(n - 1) + recfib(n - 2)); } + +unsigned long iterfib(unsigned n) +{ + unsigned long u, v, t; + + for (u = 0, v = 1; n--; t = v, v = u, u += t); + return (u); +} + +unsigned long expfib(unsigned n) +{ + unsigned long a, b, u, v, t; + + /* We work in %$\Q(\phi)$%, where %$\phi^2 = \phi + 1$%. I claim that + * %$\phi^k = F_k \phi + F_{k-1} \pmod f(\phi))$%. Proof by induction: + * note that * %$F_{-1} = F_1 - F_0 = 1$%, so %$\phi^0 = 1 = {}$% + * %$F_0 \phi + F_{-1}$%; and %$\phi^{k+1} = F_k \phi^2 + {}$% + * %$F_{k-1} \phi = F_k (\phi + 1) + F_{k-1} \phi = (F_k + {}$% + * %$F_{k-1} \phi + F_k = F_{k+1} \phi + F_k$% as claimed. + * + * Now, notice that %$(a \phi + b) (c \phi + d) = a c \phi^2 + {}$% + * $%(a d + b c) \phi + b d = a c (\phi + 1) + (a d + b c) \phi + {}$% + * %$b d = (a c + a d + b c) \phi + (a c + b d)$%. In particular, + * %$(u \phi + v)^2 \equiv (u^2 + 2 u v) \phi + (u^2 + v^2)$%. + */ + a = 0, b = 1; u = 1, v = 0; + if (n) + for (;;) { + if (n%2) { t = a*u; a = t + a*v + b*u; b = t + b*v; } + n /= 2; if (!n) break; + t = u*u; u = t + 2*u*v; v = t + v*v; + } + return (a); +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/utils/align.3.in b/utils/align.3.in index b6e2a9c..a9b9893 100644 --- a/utils/align.3.in +++ b/utils/align.3.in @@ -30,6 +30,7 @@ .\"-------------------------------------------------------------------------- .TH align 3mLib "4 January 2009" "Straylight/Edgeware" "mLib utilities library" .\" @ALIGN +.\" @ALIGNOF . .\"-------------------------------------------------------------------------- .SH NAME @@ -41,21 +42,52 @@ align \- alignment utilities .nf .B "#include " .PP -.BI "size_t ALIGN(size_t " sz ");" +.B "union align { ...\& };" +.PP +.BI "void ALIGN(size_t &" sz ");" +.BI "size_t ALIGNOF(" type ); .fi . .\"-------------------------------------------------------------------------- .SH DESCRIPTION +. +The +.B "union align" +type is (a reasonable guess at) a type with the +implementation's most restrictive alignment requirements; +i.e., if a pointer is suitably aligned for +.B "union align" +value, +then it should be suitably aligned for any other value too. +.PP +The +.B ALIGNOF +macro returns the alignment required for the given +.IR type ; +i.e., if an address is suitably aligned for a +.I type +value if and only if it is a multiple of +.BR ALIGNOF( type ) \fR. +.PP The .B ALIGN -macro returns the value of its argument +macro rounds its argument +.I sz +up to the next multiple of +.BR "ALIGNOF(union align)" : +so, if +.I sz +is an offset from the start of a suitably aligned chunk of memory, +e.g., returned by +.BR malloc (3), +then +.BI ALIGN( sz ) +adjusts .I sz -rounded up to the next multiple of the size of -.BR "union align" , -which is defined as a union containing a selection of built-in types. -The intent is to write fairly portable memory allocators, which must -return correctly-aligned memory. -.IR array . +in place, including additional padding +so that the new offset is also suitably aligned. +The intent is to assist with writing fairly portable memory allocators, +which must return correctly-aligned memory. . .\"-------------------------------------------------------------------------- .SH "SEE ALSO" diff --git a/utils/fltfmt.c b/utils/fltfmt.c index 291557b..89c705d 100644 --- a/utils/fltfmt.c +++ b/utils/fltfmt.c @@ -56,8 +56,8 @@ /*----- Some useful constants ---------------------------------------------*/ -#define B31 0x80000000 /* just bit 31 */ -#define B30 0x40000000 /* just bit 30 */ +#define B31 0x80000000u /* just bit 31 */ +#define B30 0x40000000u /* just bit 30 */ #define SH32 4294967296.0 /* 2^32 as floating-point */ diff --git a/utils/macros.h b/utils/macros.h index bc83629..5b77225 100644 --- a/utils/macros.h +++ b/utils/macros.h @@ -61,16 +61,18 @@ #define MLIB__STR(x) #x #define STR(x) MLIB__STR(x) -/* --- @GLUE@ --- * +/* --- @GLUE@, @GLUE3@ --- * * * Arguments: @x, y@ = two sequences of tokens + * @z@ = a third sequence of tokens * * Returns: A single token formed by gluing together the macro-expansions - * of @x@ and @y@. + * of @x@ and @y@, and @z@ for @GLUE3@. */ #define MLIB__GLUE(x, y) x##y #define GLUE(x, y) MLIB__GLUE(x, y) +#define GLUE3(x, y, z) GLUE(x, MLIB__GLUE(y, z)) /* --- @STATIC_ASSERT@ --- * * @@ -144,6 +146,15 @@ #define UNQUALIFY(type, p) \ CONVERT_CAREFULLY(type *, const volatile type *, p) +/* --- @EMPTY@ --- * + * + * Arguments: --- + * + * Returns: The empty token sequence. + */ + +#define EMPTY + /* --- @COMMA@ --- * * * Arguments: --- diff --git a/utils/maths.h b/utils/maths.h index fe33e8b..37227e9 100644 --- a/utils/maths.h +++ b/utils/maths.h @@ -36,10 +36,6 @@ #include -#ifndef MLIB_BITS_H -# include "bits.h" -#endif - /*----- Macros provided ---------------------------------------------------*/ /* --- @NANP@ --- * @@ -86,8 +82,6 @@ # define NEGP(x) ((x) < 0) #endif -/*----- Floating-point encoding and decoding ------------------------------*/ - /*----- That's all, folks -------------------------------------------------*/ #ifdef __cplusplus diff --git a/utils/t/fltfmt-test.c b/utils/t/fltfmt-test.c index 76e1d41..5b60607 100644 --- a/utils/t/fltfmt-test.c +++ b/utils/t/fltfmt-test.c @@ -79,18 +79,18 @@ static const struct tvec_flaginfo flterrmask_flaginfo = static const struct tvec_flag fltrnd_flags[] = { /* Standard rounding modes. */ - { "zero", 0xffff, FLTRND_ZERO }, - { "projinf", 0xffff, FLTRND_PROJINF }, - { "posinf", 0xffff, FLTRND_POSINF }, - { "neginf", 0xffff, FLTRND_NEGINF }, - { "odd", 0xffff, FLTRND_ODD }, - { "even", 0xffff, FLTRND_EVEN }, - { "nearest-even", 0xffff, FLTRND_NEAREVEN }, - { "nearest-odd", 0xffff, FLTRND_NEARODD }, - { "nearest-zero", 0xffff, FLTRND_NEARZERO }, - { "nearest-projinf", 0xffff, FLTRND_NEARINF }, - { "nearest-neginf", 0xffff, FLTRND_NEARNEG }, - { "nearest-posinf", 0xffff, FLTRND_NEARPOS }, + { "zero", 0xffffu, FLTRND_ZERO }, + { "projinf", 0xffffu, FLTRND_PROJINF }, + { "posinf", 0xffffu, FLTRND_POSINF }, + { "neginf", 0xffffu, FLTRND_NEGINF }, + { "odd", 0xffffu, FLTRND_ODD }, + { "even", 0xffffu, FLTRND_EVEN }, + { "nearest-even", 0xffffu, FLTRND_NEAREVEN }, + { "nearest-odd", 0xffffu, FLTRND_NEARODD }, + { "nearest-zero", 0xffffu, FLTRND_NEARZERO }, + { "nearest-projinf", 0xffffu, FLTRND_NEARINF }, + { "nearest-neginf", 0xffffu, FLTRND_NEARNEG }, + { "nearest-posinf", 0xffffu, FLTRND_NEARPOS }, /* Rounding mode bits: rounds away from zero in the listed conditions. * The notation corresponds to rounding predicates as follows. The syntax @@ -102,22 +102,22 @@ static const struct tvec_flag fltrnd_flags[] = { * @HALF@ %|0|% %|1|% * @LOW@ %|0|% %|1|% */ - { "+0.00", 0x0001, 0x0001 }, - { "+0.01", 0x0002, 0x0002 }, - { "+0.10", 0x0004, 0x0004 }, - { "+0.11", 0x0008, 0x0008 }, - { "+1.00", 0x0010, 0x0010 }, - { "+1.01", 0x0020, 0x0020 }, - { "+1.10", 0x0040, 0x0040 }, - { "+1.11", 0x0080, 0x0080 }, - { "-0.00", 0x0100, 0x0100 }, - { "-0.01", 0x0200, 0x0200 }, - { "-0.10", 0x0400, 0x0400 }, - { "-0.11", 0x0800, 0x0800 }, - { "-1.00", 0x1000, 0x1000 }, - { "-1.01", 0x2000, 0x2000 }, - { "-1.10", 0x4000, 0x4000 }, - { "-1.11", 0x8000, 0x8000 }, + { "+0.00", 0x0001u, 0x0001u }, + { "+0.01", 0x0002u, 0x0002u }, + { "+0.10", 0x0004u, 0x0004u }, + { "+0.11", 0x0008u, 0x0008u }, + { "+1.00", 0x0010u, 0x0010u }, + { "+1.01", 0x0020u, 0x0020u }, + { "+1.10", 0x0040u, 0x0040u }, + { "+1.11", 0x0080u, 0x0080u }, + { "-0.00", 0x0100u, 0x0100u }, + { "-0.01", 0x0200u, 0x0200u }, + { "-0.10", 0x0400u, 0x0400u }, + { "-0.11", 0x0800u, 0x0800u }, + { "-1.00", 0x1000u, 0x1000u }, + { "-1.01", 0x2000u, 0x2000u }, + { "-1.10", 0x4000u, 0x4000u }, + { "-1.11", 0x8000u, 0x8000u }, /* Phew! */ TVEC_ENDFLAGS -- [mdw]