chiark / gitweb /
@@@ pool upgrade
[mLib] / test / bench.h
1 /* -*-c-*-
2  *
3  * Benchmarking support
4  *
5  * (c) 2023 Straylight/Edgeware
6  */
7
8 /*----- Licensing notice --------------------------------------------------*
9  *
10  * This file is part of the mLib utilities library.
11  *
12  * mLib is free software: you can redistribute it and/or modify it under
13  * the terms of the GNU Library General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or (at
15  * your option) any later version.
16  *
17  * mLib is distributed in the hope that it will be useful, but WITHOUT
18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
20  * License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License along with mLib.  If not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25  * USA.
26  */
27
28 #ifndef MLIB_BENCH_H
29 #define MLIB_BENCH_H
30
31 #ifdef __cplusplus
32   extern "C" {
33 #endif
34
35 /*----- Header files ------------------------------------------------------*/
36
37 #include <limits.h>
38 #include <time.h>
39
40 #ifndef MLIB_BITS_H
41 #  include "bits.h"
42 #endif
43
44 #ifndef MLIB_CONTROL_H
45 #  include "control.h"
46 #endif
47
48 #ifndef MLIB_DSTR_H
49 #  include "dstr.h"
50 #endif
51
52 #ifndef MLIB_MACROS_H
53 #  include "macros.h"
54 #endif
55
56 #ifndef MLIB_GPRINTF_H
57 #  include "gprintf.h"
58 #endif
59
60 /*----- Timers ------------------------------------------------------------*/
61
62 struct bench_time {
63   unsigned f;                           /* flags */
64 #define BTF_TIMEOK 1u                   /*   @s@ ad @ns@ slots are value */
65 #define BTF_CYOK 2u                     /*   @cy@ slot is valid */
66 #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /*   some part is useful */
67   union {
68     struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */
69     clock_t clk;                        /* @clock@ */
70     kludge64 rawns;                     /* raw nanosecond count */
71   } t;                                  /* time */
72   kludge64 cy;                          /* count of CPU cycles */
73 };
74
75 struct bench_timing {
76   unsigned f;                           /* flags (@BTF_...@) */
77   double n, t, cy;                      /* count, time, and cycles */
78 };
79
80 struct bench_timer {
81   const struct bench_timerops *ops;
82   unsigned ref;
83 };
84
85 struct bench_timerops {
86   void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
87     /* Write a description of the timer to @d@. */
88
89   int (*preflight)(struct bench_timer */*bt*/);
90     /* Return zero if timer is ready to go, or %$-1$% otherwise.  The @now@
91      * function will only be called following a successful @preflight@ on the
92      * same thread.
93      */
94
95   int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/,
96               unsigned /*f*/);
97 #define BTF_T0 0u                       /* fetching first time of a pair */
98 #define BTF_T1 1u                       /* fetching second time of a pair */
99     /* Fill in @*t_out@ with the current time.  Return zero on success
100      * %%\emph{or} permanent failure; return %$-1$% on temporary failure.
101      */
102
103   void (*diff)(struct bench_timer */*bt*/,
104                struct bench_timing */*delta_out*/,
105                const struct bench_time */*t0*/,
106                const struct bench_time */*t1*/);
107     /* Subtract the time @t0@ from the time @t1@, leaving the result in
108      * @*delta_out@, setting flags as appropriate.
109      */
110
111   void (*destroy)(struct bench_timer */*bt*/);
112     /* Release the timer and any resources it holds. */
113 };
114
115 /* --- @bench_createtimer@ --- *
116  *
117  * Arguments:   @const char *config@ = user-supplied configuration string
118  *
119  * Returns:     A freshly constructed standard timer object, or null on
120  *              failure.
121  *
122  * Use:         Allocate a timer.  Dispose of it by calling
123  *              @tm->ops->destroy(tm)@ when you're done.
124  *
125  *              Applications should not set configuration strings except as
126  *              established by user action, e.g., from a command-line option,
127  *              environment variable, or configuration file.
128  */
129
130 extern struct bench_timer *bench_createtimer(const char *config);
131
132 /* --- @BENCH_TIMELOOP_DECLS@ --- *
133  *
134  * Arguments:   ---
135  *
136  * Use:         Expands to the declarations needed by @BENCH_TIMELOOP_TAG@.
137  *              These should be at block scope, not at toplevel.
138  */
139
140 #define BENCH_TIMELOOP_DECLS                                            \
141   struct bench_timer *_bench_tm;                                        \
142   struct bench_time _bench_t0, _bench_t1;                               \
143   unsigned long _bench_n, _bench_n1
144
145 /* --- @BENCH_TIMELOOP_TAG@ --- *
146  *
147  * Arguments:   @tag@ = control structure tag
148  *              @struct bench_state *tm@ = benchmark timer
149  *              @struct bench_timing *delta_out@ = where to put the result
150  *              @double n@ = number of iterations
151  *
152  * Returns:     ---
153  *
154  * Use:         @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
155  *              @n@ times, capturing the time and/or CPU cycles taken in
156  *              @*delta_out@.  The iteration count must be no more than the
157  *              %%\emph{square}%% of @ULONG_MAX@.  If @stmt@ executes a free
158  *              @break@ statement, then the containing loop -- which must
159  *              exist -- is exited.
160  *
161  *              This macro is not intended to be used directly by users: it
162  *              is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
163  */
164
165 #define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak)              \
166   MC_BEFORE(tag##__benchtl_setup, {                                     \
167     double _R = ULONG_MAX; double _n = (n);                             \
168                                                                         \
169     _bench_tm = (tm);                                                   \
170     if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; }                     \
171     else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; }           \
172   })                                                                    \
173   MC_TARGET(tag##__benchtl_break, { break; })                           \
174   MC_AFTER(tag##__benchtl_end, {                                        \
175     _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \
176   })                                                                    \
177   MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--)                     \
178     MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; })            \
179     MC_DOWHILE(tag##__benchtl_t1,                                       \
180                _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1))      \
181       MC_WRAP(tag##__benchtl_t0,                                        \
182         { while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \
183         { ; },                                                          \
184         { MC_GOTARGET(tag##__benchtl_break); })
185
186 /*----- Measuring ---------------------------------------------------------*/
187
188 struct bench_state {
189   struct bench_timer *tm;               /* a timer */
190   double target_s;                      /* target time to run benchmarks */
191   unsigned f;                           /* calibration flags (@BTF_...@) */
192 #define BTF_CLB 0x0100                  /*   tried calibrating */
193   struct { double m, c; } clk, cy;      /* calculated overheads */
194 };
195
196 typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
197   /* Run the benchmark @n@ times, given a context pointer @ctx@. */
198
199 /* --- @bench_init@ --- *
200  *
201  * Arguments:   @struct bench_state *b@ = bench state to initialize
202  *              @struct bench_timer *tm@ = timer to attach, or null
203  *
204  * Returns:     Zero on success, %$-1$% on failure.
205  *
206  * Use:         Initialize the benchmark state.  On success, the timer state
207  *              still needs to be calibrated (use @bench_calibrate@) before
208  *              it can be used, but this will be done automatically by
209  *              @bench_measure@ if it's not done by hand earlier.  The timer
210  *              is now owned by the benchmark state and will be destroyed by
211  *              @bench_destroy@.
212  *
213  *              The only reason for failure is if @tm@ was null on entry,
214  *              and automatic construction of a timer failed.  The state is
215  *              safe to discard, but calling @bench_destroy@ is safe too.
216  */
217
218 extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/);
219
220 /* --- @bench_destroy@ --- *
221  *
222  * Arguments:   @struct bench_state *b@ = bench state
223  *
224  * Returns:     ---
225  *
226  * Use:         Destroy the benchmark state, releasing the resources that it
227  *              holds.
228  */
229
230 extern void bench_destroy(struct bench_state */*b*/);
231
232 /* --- @bench_calibrate@ --- *
233  *
234  * Arguments:   @struct bench_state *b@ = bench state
235  *              @unsigned f@ = calibration flags
236  *
237  * Returns:     Zero on success, %$-1$% if calibration failed.
238  *
239  * Use:         Calibrate the benchmark state, so that it can be used to
240  *              measure performance reasonably accurately.
241  *
242  *              Calibration will take into account how the subject code is
243  *              going to be located.  If you're going to use @BENCH_MEASURE@
244  *              to measure a piece of literal code, then leave @f@ zero.  If
245  *              the code to be measured is going to be executed via an
246  *              indirect branch, e.g., through the @measure@ function, then
247  *              set @BTF_INDIRECT@.
248  */
249
250 #define BTF_INDIRECT 1u
251 extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/);
252
253 /* --- @bench_preflight@ --- *
254  *
255  * Arguments:   @struct bench_state *b@ = benchmark state
256  *
257  * Returns:     Zero on success, %$-1$% on failure.
258  *
259  * Use:         Prepares for benchmarking on the current thread.  Current
260  *              checks are that the timer is calibrated and that it can
261  *              successfully measure time; the timer preflight is also run.
262  *
263  *              Users are unlikely to find this function useful: it's called
264  *              automatically by the @BENCH_MEASURE@ macro and the
265  *              @bench_measure@ function.
266  */
267
268 extern int bench_preflight(struct bench_state */*b*/);
269
270 /* --- @bench_adapt@ --- *
271  *
272  * Arguments:   @double *n_inout@ = number of iterations, updated
273  *              @double target_s@ = target time in seconds
274  *              @const struct bench_timing *t@ = timing from the previous run
275  *
276  * Returns:     Nonzero if the measurement is sufficient; zero to run again.
277  *
278  * Use:         This function determines a suitable number of iterations of a
279  *              benchmark function to perform next.  It is used in a loop
280  *              such as the following.
281  *
282  *                      @BENCH_TIMELOOP_DECLS;@
283  *                      @struct bench_timer *tm;@
284  *                      @struct bench_timing t;@
285  *                      @double n = 1.0, target_s = 1.0;@
286  *
287  *                      @do {@
288  *                        @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
289  *                          (do @_bench_n@ iterations of some computation)@;@
290  *                      @} while (!bench_adapt(&n, target_s, &t));@
291  *
292  *              On entry, @*n_inout@ should be the number of iterations
293  *              performed by the previous pass, and @*t@ the resulting time;
294  *              the @BTF_TIMEOK@ flag must be set in @t->f@.  If the timing
295  *              is sufficient -- @t->t@ is sufficiently close to @target_s@
296  *              -- then the function returns nonzero to indicate that
297  *              measurement is complete.  Otherwise, it sets @*n_inout@ to a
298  *              new, larger iteration count and returns zero to indicate that
299  *              a further pass is necessary.
300  *
301  *              Users are unlikely to find this function useful: it's called
302  *              automatically by the @BENCH_MEASURE@ macro and the
303  *              @bench_measure@ function.
304  */
305
306 extern int bench_adapt(double */*n_inout*/, double /*target_s*/,
307                        const struct bench_timing */*t*/);
308
309 /* --- @bench_adjust@ --- *
310  *
311  * Arguments:   @struct bench_state *b@ = benchmark state
312  *              @struct bench_timing *t_inout@ = timing to adjust
313  *              @double n@ = number of external iterations performed
314  *              @double base@ = number of internal operations per external
315  *                      iteration
316  *
317  * Returns:     ---
318  *
319  * Use:         Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
320  *              according to the calibration data captured in @b@.
321  *              On exit, the timing data is updated, and @t->n@ is set to the
322  *              product @n*base@.
323  *
324  *              Users are unlikely to find this function useful: it's called
325  *              automatically by the @BENCH_MEASURE@ macro and the
326  *              @bench_measure@ function.
327  */
328
329 extern void bench_adjust(struct bench_state */*b*/,
330                          struct bench_timing */*t_inout*/,
331                          double /*n*/, double /*base*/);
332
333 /* --- @BENCH_MEASURE_DECLS@ --- *
334  *
335  * Arguments:   ---
336  *
337  * Use:         Expands to the declarations needed by @BENCH_MEASURE@.
338  *              These should be at block scope, not at toplevel.
339  */
340
341 #define BENCH_MEASURE_DECLS                                             \
342   struct bench_state *_bench_b;                                         \
343   struct bench_timing *_bench_t;                                        \
344   double _bench_nn;                                                     \
345   BENCH_TIMELOOP_DECLS
346
347 /* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
348  *
349  * Arguments:   @tag@ = control structure tag
350  *              @struct bench_state *b@ = benchmark state
351  *              @int &rc@ = where to put the result code (zero on success,
352  *                      %$-1$% on failure)
353  *              @struct bench_timing *t_out@ = where to put the result
354  *              @double base@ = number of operations per external iteration
355  *
356  * Returns:     ---
357  *
358  * Use:         @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
359  *              execution of @stmt@.
360  *
361  *              The statement should perform @_bench_n@ iterations of some
362  *              operation.  It will be invoked multiple times, with varying
363  *              iteration counts, so as to run for approximately
364  *              @b->target_s@ seconds.
365  *
366  *              On success, the resulting timing is left in @*t_out@, with
367  *              @t_out->n@ holding the product of the final iteration count
368  *              and @base@, and @rc@ is set to zero.  If the timer fails, or
369  *              if @stmt@ invokes a free @break@ statement, then @rc@ is set
370  *              to %$-1$%.
371  *
372  *              The macro @BENCH_MEASURE_TAG@ is the same, except that it
373  *              allows an explicit control-structure tag to be set so that it
374  *              can be used within larger control structure macros.
375  */
376
377 #define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)                      \
378   MC_BEFORE(tag##__benchmsr_setup, {                                    \
379     _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;                \
380     if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);   \
381   })                                                                    \
382   MC_TARGET(tag##__benchmsr_done,                                       \
383     { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
384   MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })                       \
385   for (;;)                                                              \
386     MC_WRAP(tag##__benchmsr_loop,                                       \
387       { ; },                                                            \
388       { _bench_t->f &= _bench_b->f;                                     \
389         if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
390         if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t))      \
391           MC_GOTARGET(tag##__benchmsr_done);                            \
392       },                                                                \
393       { MC_GOTARGET(tag##__benchmsr_fail); })                           \
394     BENCH_TIMELOOP_TAG(tag##__benchmsr_time,                            \
395                        _bench_b->tm, _bench_t, _bench_nn, { break; })
396
397 #define BENCH_MEASURE(b, rc, t_out, base)                               \
398   BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
399
400 /* --- @bench_measure@ --- *
401  *
402  * Arguments:   @struct bench_state *b@ = benchmark state
403  *              @struct bench_timing *t_out@ = where to leave the timing
404  *              @double base@ = number of internal units per call
405  *              @bench_fn *fn@, @void *ctx@ = benchmark function to run
406  *
407  * Returns:     Zero on success, %$-1$% if timing failed.
408  *
409  * Use:         Measure a function.  The function @fn@ is called adaptively
410  *              with an iteration count @n@ set so as to run for
411  *              approximately @b->target_s@ seconds.
412  *
413  *              The result is left in @*t_out@, with @t_out->n@ counting the
414  *              final product of the iteration count and @base@ (which might,
415  *              e.g., reflect the number of inner iterations the function
416  *              performs, or the number of bytes it processes per iteration).
417  */
418
419 extern int bench_measure(struct bench_state */*b*/,
420                          struct bench_timing */*t_out*/,
421                          double /*base*/, bench_fn */*fn*/, void */*ctx*/);
422
423 /*----- Reporting ---------------------------------------------------------*/
424
425 enum {
426   BTU_OP,                              /* counting operations of some kind */
427   BTU_BYTE,                             /* counting bytes (@rbuf >= 0@) */
428   BTU_LIMIT                             /* (number of units) */
429 };
430
431 /* --- @bench_report@ --- *
432  *
433  * Arguments:   @const struct gprintf_ops *gops, void *gp@ = output formatter
434  *              @unsigned unit@ = unit processed by the benchmark function
435  *              @const struct bench_timing *t@ = benchmark result
436  *
437  * Returns:     ---
438  *
439  * Use:         Format, to the output identified by @gops@ and @go@, a
440  *              human-readable report of the benchmarking result @t@.  No
441  *              newline is appended.
442  *
443  *              The output format is subject to change in later versions.
444  */
445
446 extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/,
447                          unsigned /*unit*/,
448                          const struct bench_timing */*t*/);
449
450 /*----- That's all, folks -------------------------------------------------*/
451
452 #ifdef __cplusplus
453   }
454 #endif
455
456 #endif