chiark / gitweb /
@@@ fltfmt mess
[mLib] / test / bench.h
1 /* -*-c-*-
2  *
3  * Benchmarking support
4  *
5  * (c) 2023 Straylight/Edgeware
6  */
7
8 /*----- Licensing notice --------------------------------------------------*
9  *
10  * This file is part of the mLib utilities library.
11  *
12  * mLib is free software: you can redistribute it and/or modify it under
13  * the terms of the GNU Library General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or (at
15  * your option) any later version.
16  *
17  * mLib is distributed in the hope that it will be useful, but WITHOUT
18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
20  * License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License along with mLib.  If not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25  * USA.
26  */
27
28 #ifndef MLIB_BENCH_H
29 #define MLIB_BENCH_H
30
31 #ifdef __cplusplus
32   extern "C" {
33 #endif
34
35 /*----- Header files ------------------------------------------------------*/
36
37 #include <limits.h>
38 #include <time.h>
39
40 #ifndef MLIB_BITS_H
41 #  include "bits.h"
42 #endif
43
44 #ifndef MLIB_CONTROL_H
45 #  include "control.h"
46 #endif
47
48 #ifndef MLIB_DSTR_H
49 #  include "dstr.h"
50 #endif
51
52 #ifndef MLIB_GPRINTF_H
53 #  include "gprintf.h"
54 #endif
55
56 /*----- Data structures ---------------------------------------------------*/
57
58 struct bench_time {
59   unsigned f;                           /* flags */
60 #define BTF_TIMEOK 1u                   /*   @s@ ad @ns@ slots are value */
61 #define BTF_CYOK 2u                     /*   @cy@ slot is valid */
62 #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /*   some part is useful */
63   union {
64     struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */
65     clock_t clk;                        /* @clock@ */
66     kludge64 rawns;                     /* raw nanosecond count */
67   } t;                                  /* time */
68   kludge64 cy;                          /* count of CPU cycles */
69 };
70
71 struct bench_timing {
72   unsigned f;                           /* flags (@BTF_...@) */
73   double n, t, cy;                      /* count, time, and cycles */
74 };
75
76 struct bench_timer { const struct bench_timerops *ops; };
77
78 struct bench_timerops {
79   void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
80     /* Write a description of the timer to @d@. */
81
82   int (*preflight)(struct bench_timer */*bt*/);
83     /* Return zero if timer is ready to go, or %$-1$% otherwise.  The @now@
84      * function will only be called following a successful @preflight@ on the
85      * same thread.
86      */
87
88   int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/,
89               unsigned /*f*/);
90 #define BTF_T0 0u                       /* fetching first time of a pair */
91 #define BTF_T1 1u                       /* fetching second time of a pair */
92     /* Fill in @*t_out@ with the current time.  Return zero on success
93      * %%\emph{or} permanent failure; return %$-1$% on temporary failure.
94      */
95
96   void (*diff)(struct bench_timer */*bt*/,
97                struct bench_timing */*delta_out*/,
98                const struct bench_time */*t0*/,
99                const struct bench_time */*t1*/);
100     /* Subtract the time @t0@ from the time @t1@, leaving the result in
101      * @*delta_out@, setting flags as appropriate.
102      */
103
104   void (*destroy)(struct bench_timer */*bt*/);
105     /* Release the timer and any resources it holds. */
106 };
107
108 struct bench_state {
109   struct bench_timer *tm;               /* a timer */
110   double target_s;                      /* target time to run benchmarks */
111   unsigned f;                           /* calibration flags (@BTF_...@) */
112 #define BTF_CLB 0x0100                  /*   tried calibrating */
113   struct { double m, c; } clk, cy;      /* calculated overheads */
114 };
115
116 typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
117   /* Run the benchmark @n@ times, given a context pointer @ctx@. */
118
119 enum {
120   BTU_OP,                              /* counting operations of some kind */
121   BTU_BYTE,                             /* counting bytes (@rbuf >= 0@) */
122   BTU_LIMIT                             /* (number of units) */
123 };
124
125 /*----- Macros ------------------------------------------------------------*/
126
127 /* --- @BENCH_TIMELOOP_DECLS@ --- *
128  *
129  * Arguments:   ---
130  *
131  * Use:         Expands to the declarations needed by @BENCH_TIMELOOP_TAG@.
132  *              These should be at block scope, not at toplevel.
133  */
134
135 #define BENCH_TIMELOOP_DECLS                                            \
136   struct bench_timer *_bench_tm;                                        \
137   struct bench_time _bench_t0, _bench_t1;                               \
138   unsigned long _bench_n, _bench_n1
139
140 /* --- @BENCH_TIMELOOP_TAG@ --- *
141  *
142  * Arguments:   @tag@ = control structure tag
143  *              @struct bench_state *b@ = benchmark state
144  *              @struct bench_timing *delta_out@ = where to put the result
145  *              @double n@ = number of iterations
146  *
147  * Returns:     ---
148  *
149  * Use:         @BENCH_TIMELOOP_TAG(tag, b, delta_out, n) stmt@ runs @stmt@
150  *              @n@ times, capturing the time and/or CPU cycles taken in
151  *              @*delta_out@.  The iteration count must be no more than the
152  *              %%\emph{square}%% of @ULONG_MAX@.  If @stmt@ executes a free
153  *              @break@ statement, then the containing loop -- which ust
154  *              exist -- is exited.
155  *
156  *              This macro is not intended to be used directly by users: it
157  *              is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
158  */
159
160 #define BENCH_TIMELOOP_TAG(tag, b, delta_out, n)                        \
161   MC_BEFORE(tag##__benchtl_setup, {                                     \
162     double _R = ULONG_MAX; double _n = (n);                             \
163                                                                         \
164     _bench_tm = (b)->tm;                                                \
165     if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; }                     \
166     else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; }           \
167   })                                                                    \
168   MC_TARGET(tag##__benchtl_break, { break; })                           \
169   MC_AFTER(tag##__benchtl_end, {                                        \
170     _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \
171   })                                                                    \
172   MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--)                     \
173     MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; })            \
174     MC_DOWHILE(tag##__benchtl_t1,                                       \
175                _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1))      \
176       MC_WRAP(tag##__benchtl_t0,                                        \
177         { while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \
178         { ; },                                                          \
179         { MC_GOTARGET(tag##__benchtl_break); })
180
181 /* --- @BENCH_MEASURE_DECLS@ --- *
182  *
183  * Arguments:   ---
184  *
185  * Use:         Expands to the declarations needed by @BENCH_MEASURE@.
186  *              These should be at block scope, not at toplevel.
187  */
188
189 #define BENCH_MEASURE_DECLS                                             \
190   struct bench_state *_bench_b;                                         \
191   struct bench_timing *_bench_t;                                        \
192   double _bench_nn;                                                     \
193   BENCH_TIMELOOP_DECLS
194
195 /* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
196  *
197  * Arguments:   @tag@ = control structure tag
198  *              @struct bench_state *b@ = benchmark state
199  *              @int &rc@ = where to put the result code (zero on success,
200  *                      %$-1$% on failure)
201  *              @struct bench_timing *t_out@ = where to put the result
202  *              @double base@ = number of operations per external iteration
203  *
204  * Returns:     ---
205  *
206  * Use:         @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
207  *              execution of @stmt@.
208  *
209  *              The statement should perform @_bench_n@ iterations of some
210  *              operation.  It will be invoked multiple times, with varying
211  *              iteration counts, so as to run for approximately
212  *              @b->target_s@ seconds.
213  *
214  *              On success, the resulting timing is left in @*t_out@, with
215  *              @t_out->n@ holding the product of the final iteration count
216  *              and @base@, and @rc@ is set to zero.  If the timer fails, or
217  *              if @stmt@ invokes a free @break@ statement, then @rc@ is set
218  *              to %$-1$%.
219  *
220  *              The macro @BENCH_MEASURE_TAG@ is the same, except that it
221  *              allows an explicit control-structure tag to be set so that it
222  *              can be used within larger control structure macros.
223  */
224
225 #define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)                      \
226   MC_BEFORE(tag##__benchmsr_setup, {                                    \
227     _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;                \
228     if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);   \
229   })                                                                    \
230   MC_TARGET(tag##__benchmsr_done,                                       \
231     { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
232   MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })                       \
233   for (;;)                                                              \
234     MC_WRAP(tag##__benchmsr_loop,                                       \
235       { ; },                                                            \
236       { _bench_t->f &= _bench_b->f;                                     \
237         if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
238         if (bench_adapt(_bench_b, &_bench_nn, _bench_t))                \
239           MC_GOTARGET(tag##__benchmsr_done);                            \
240       },                                                                \
241       { MC_GOTARGET(tag##__benchmsr_fail); })                           \
242     BENCH_TIMELOOP_TAG(tag##__benchmsr_time, _bench_b, _bench_t, _bench_nn)
243
244 #define BENCH_MEASURE(b, rc, t_out, base)                               \
245   BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
246
247 /* --- @BENCHMARK_DECLS@ --- *
248  *
249  * Arguments:   ---
250  *
251  * Use:         Expands to the declarations needed by @BENCHMARK_TAG@.
252  *              These should be at block scope, not at toplevel.
253  */
254
255 #define BENCHMARK_DECLS                                                 \
256   struct bench_timing _bench_tm;                                        \
257   int _bench_rc;                                                        \
258   BENCH_MEASURE_DECLS
259
260 /* --- @BENCHMARK_TAG@ --- *
261  *
262  * Arguments:   @tag@ = control structure tag
263  *              @struct bench_state *b@ = benchmark state
264  *              @struct gprintf_ops *gops, void *go@ = output formatter
265  *              @unsigned unit@ = unit being measured (@BTU_...@ code)
266  *              @double base@ = number of units per external iteration
267  *
268  * Returns:     ---
269  *
270  * Use:         @BENCHMARK_TAG(tag, b, gops, go, unit, base) stmt@ measures
271  *              the execution of @stmt@ and writes a report to an output
272  *              formatter.  The @stmt@ should run @_bench_n@ iterations of
273  *              the operation to be measured.
274  *
275  *              No tagless version of this macro is provided, because it is
276  *              expected to be useful primarily in the construction of
277  *              higher-level macros.
278  */
279
280 #define BENCHMARK_TAG(tag, b, gops, go, unit, base)                     \
281   MC_AFTER(tag##__benchmark_after, {                                    \
282     if (_bench_rc) gprintf((gops), (go), "FAILED");                     \
283     else bench_report((gops), (go), _bench_b, (unit), &_bench_tm);      \
284   })                                                                    \
285   BENCH_MEASURE_TAG(tag##__benchmark_measure,                           \
286                     (b), _bench_rc, &_bench_tm, (base))
287
288 /*----- Functions provided ------------------------------------------------*/
289
290 /* --- @bench_createtimer@ --- *
291  *
292  * Arguments:   @const char *config@ = user-supplied configuration string
293  *
294  * Returns:     A freshly constructed standard timer object, or null on
295  *              failure.
296  *
297  * Use:         Allocate a timer.  Dispose of it by calling
298  *              @tm->ops->destroy(tm)@ when you're done.
299  *
300  *              Applications should not set configuration strings except as
301  *              established by user action, e.g., from a command-line option,
302  *              environment variable, or configuration file.
303  */
304
305 extern struct bench_timer *bench_createtimer(const char *config);
306
307 /* --- @bench_init@ --- *
308  *
309  * Arguments:   @struct bench_state *b@ = bench state to initialize
310  *              @struct bench_timer *tm@ = timer to attach, or null
311  *
312  * Returns:     Zero on success, %$-1$% on failure.
313  *
314  * Use:         Initialize the benchmark state.  On success, the timer state
315  *              still needs to be calibrated (use @bench_calibrate@) before
316  *              it can be used, but this will be done automatically by
317  *              @bench_measure@ if it's not done by hand earlier.  The timer
318  *              is now owned by the benchmark state and will be destroyed by
319  *              @bench_destroy@.
320  *
321  *              The only reason for failure is if @tm@ was null on entry,
322  *              and automatic construction of a timer failed.  The state is
323  *              safe to discard, but calling @bench_destroy@ is safe too.
324  */
325
326 extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/);
327
328 /* --- @bench_destroy@ --- *
329  *
330  * Arguments:   @struct bench_state *b@ = bench state
331  *
332  * Returns:     ---
333  *
334  * Use:         Destroy the benchmark state, releasing the resources that it
335  *              holds.
336  */
337
338 extern void bench_destroy(struct bench_state */*b*/);
339
340 /* --- @bench_calibrate@ --- *
341  *
342  * Arguments:   @struct bench_state *b@ = bench state
343  *              @unsigned f@ = calibration flags
344  *
345  * Returns:     Zero on success, %$-1$% if calibration failed.
346  *
347  * Use:         Calibrate the benchmark state, so that it can be used to
348  *              measure performance reasonably accurately.
349  *
350  *              Calibration will take into account how the subject code is
351  *              going to be located.  If you're going to use @BENCH_MEASURE@
352  *              to measure a piece of literal code, then leave @f@ zero.  If
353  *              the code to be measured is going to be executed via an
354  *              indirect branch, e.g., through the @measure@ function, then
355  *              set @BTF_INDIRECT@.
356  */
357
358 #define BTF_INDIRECT 1u
359 extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/);
360
361 /* --- @bench_preflight@ --- *
362  *
363  * Arguments:   @struct bench_state *b@ = benchmark state
364  *
365  * Returns:     Zero on success, %$-1$% on failure.
366  *
367  * Use:         Prepares for benchmarking on the current thread.  Current
368  *              checks are that the timer is calibrated and that it can
369  *              successfully measure time; the timer preflight is also run.
370  *
371  *              Users are unlikely to find this function useful: it's called
372  *              automatically by the @BENCH_MEASURE@ macro and the
373  *              @bench_measure@ function.
374  */
375
376 extern int bench_preflight(struct bench_state */*b*/);
377
378 /* --- @bench_adapt@ --- *
379  *
380  * Arguments:   @struct bench_state *b@ = benchmark state
381  *              @double *n_inout@ = number of iterations, updated
382  *              @const struct bench_timing *t@ = timing from the previous run
383  *
384  * Returns:     Nonzero if the measurement is sufficient; zero to run again.
385  *
386  * Use:         This function determines a suitable number of iterations of a
387  *              benchmark function to perform next.  It is used in a loop
388  *              such as the following.
389  *
390  *                      @double n = 1.0;@
391  *                      @struct bench_timing t;@
392  *
393  *                      @do {@
394  *                        (run @n@ iterations; set @t@ to the timing)
395  *                      @} while (!bench_adapt(b, &n, &t));@
396  *
397  *              On entry, @*n_inout@ should be the number of iterations
398  *              performed by the previous pass, and @*t@ the resulting time;
399  *              the @BTF_TIMEOK@ flag must be set @t->f@.  If the timing is
400  *              sufficient -- @t->t@ is sufficiently close to @b->target_s@
401  *              -- then the function returns nonzero to indicate that
402  *              measurement is complete.  Otherwise, it sets @*n_inout@ to a
403  *              new, larger iteration count and returns zero to indicate that
404  *              a further pass is necessary.
405  *
406  *              Users are unlikely to find this function useful: it's called
407  *              automatically by the @BENCH_MEASURE@ macro and the
408  *              @bench_measure@ function.
409  */
410
411 extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/,
412                        const struct bench_timing */*t*/);
413
414 /* --- @bench_adjust@ --- *
415  *
416  * Arguments:   @struct bench_state *b@ = benchmark state
417  *              @struct bench_timing *t_inout@ = timing to adjust
418  *              @double n@ = number of external iterations performed
419  *              @double base@ = number of internal operations per external
420  *                      iteration
421  *
422  * Returns:     ---
423  *
424  * Use:         Adjusts a raw timing, as captured by @BENCH_TIMELOOP@,
425  *              according to the calibration data captured in @b@.
426  *              On exit, the timing data is updated, and @t->n@ is set to the
427  *              product @n*base@.
428  *
429  *              Users are unlikely to find this function useful: it's called
430  *              automatically by the @BENCH_MEASURE@ macro and the
431  *              @bench_measure@ function.
432  */
433
434 extern void bench_adjust(struct bench_state */*b*/,
435                          struct bench_timing */*t_inout*/,
436                          double /*n*/, double /*base*/);
437
438 /* --- @bench_measure@ --- *
439  *
440  * Arguments:   @struct bench_state *b@ = benchmark state
441  *              @struct bench_timing *t_out@ = where to leave the timing
442  *              @double base@ = number of internal units per call
443  *              @bench_fn *fn@, @void *ctx@ = benchmark function to run
444  *
445  * Returns:     Zero on success, %$-1$% if timing failed.
446  *
447  * Use:         Measure a function.  The function @fn@ is called adaptively
448  *              with an iteration count @n@ set so as to run for
449  *              approximately @b->target_s@ seconds.
450  *
451  *              The result is left in @*t_out@, with @t_out->n@ counting the
452  *              final product of the iteration count and @base@ (which might,
453  *              e.g., reflect the number of inner iterations the function
454  *              performs, or the number of bytes it processes per iteration).
455  */
456
457 extern int bench_measure(struct bench_state */*b*/,
458                          struct bench_timing */*t_out*/,
459                          double /*base*/, bench_fn */*fn*/, void */*ctx*/);
460
461 /*----- Reporting ---------------------------------------------------------*/
462
463 /* --- @bench_report@ --- *
464  *
465  * Arguments:   @const struct gprintf_ops *gops, void *gp@ = output formatter
466  *              @unsigned unit@ = unit processed by the benchmark function
467  *              @const struct bench_timing *t@ = benchmark result
468  *
469  * Returns:     ---
470  *
471  * Use:         Format, to the output identified by @gops@ and @go@, a
472  *              human-readable report of the benchmarking result @t@.  No
473  *              newline is appended.
474  *
475  *              The output format is subject to change in later versions.
476  */
477
478 extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/,
479                          unsigned /*unit*/,
480                          const struct bench_timing */*t*/);
481
482 /*----- That's all, folks -------------------------------------------------*/
483
484 #ifdef __cplusplus
485   }
486 #endif
487
488 #endif