Commit | Line | Data |
---|---|---|
b64eb60f MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Benchmarking support | |
4 | * | |
5 | * (c) 2023 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of the mLib utilities library. | |
11 | * | |
12 | * mLib is free software: you can redistribute it and/or modify it under | |
13 | * the terms of the GNU Library General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or (at | |
15 | * your option) any later version. | |
16 | * | |
17 | * mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | * License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU Library General Public | |
23 | * License along with mLib. If not, write to the Free Software | |
24 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | * USA. | |
26 | */ | |
27 | ||
28 | #ifndef MLIB_BENCH_H | |
29 | #define MLIB_BENCH_H | |
30 | ||
31 | #ifdef __cplusplus | |
32 | extern "C" { | |
33 | #endif | |
34 | ||
35 | /*----- Header files ------------------------------------------------------*/ | |
36 | ||
b1a20bee | 37 | #include <limits.h> |
6e683a79 MW |
38 | #include <time.h> |
39 | ||
b64eb60f MW |
40 | #ifndef MLIB_BITS_H |
41 | # include "bits.h" | |
42 | #endif | |
43 | ||
b1a20bee MW |
44 | #ifndef MLIB_CONTROL_H |
45 | # include "control.h" | |
46 | #endif | |
47 | ||
13ee7406 MW |
48 | #ifndef MLIB_DSTR_H |
49 | # include "dstr.h" | |
50 | #endif | |
51 | ||
b1a20bee MW |
52 | #ifndef MLIB_GPRINTF_H |
53 | # include "gprintf.h" | |
54 | #endif | |
55 | ||
b64eb60f MW |
56 | /*----- Data structures ---------------------------------------------------*/ |
57 | ||
58 | struct bench_time { | |
67b5031e MW |
59 | unsigned f; /* flags */ |
60 | #define BTF_TIMEOK 1u /* @s@ ad @ns@ slots are value */ | |
61 | #define BTF_CYOK 2u /* @cy@ slot is valid */ | |
62 | #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /* some part is useful */ | |
6e683a79 MW |
63 | union { |
64 | struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */ | |
65 | clock_t clk; /* @clock@ */ | |
66 | kludge64 rawns; /* raw nanosecond count */ | |
67 | } t; /* time */ | |
c91413e6 | 68 | kludge64 cy; /* count of CPU cycles */ |
b64eb60f MW |
69 | }; |
70 | ||
71 | struct bench_timing { | |
c91413e6 | 72 | unsigned f; /* flags (@BTF_...@) */ |
67b5031e | 73 | double n, t, cy; /* count, time, and cycles */ |
b64eb60f MW |
74 | }; |
75 | ||
76 | struct bench_timer { const struct bench_timerops *ops; }; | |
77 | ||
78 | struct bench_timerops { | |
13ee7406 MW |
79 | void (*describe)(struct bench_timer */*bt*/, dstr */*d*/); |
80 | /* Write a description of the timer to @d@. */ | |
81 | ||
b1a20bee MW |
82 | int (*preflight)(struct bench_timer */*bt*/); |
83 | /* Return zero if timer is ready to go, or %$-1$% otherwise. The @now@ | |
84 | * function will only be called following a successful @preflight@ on the | |
85 | * same thread. | |
86 | */ | |
87 | ||
6e683a79 MW |
88 | int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/, |
89 | unsigned /*f*/); | |
90 | #define BTF_T0 0u /* fetching first time of a pair */ | |
91 | #define BTF_T1 1u /* fetching second time of a pair */ | |
92 | /* Fill in @*t_out@ with the current time. Return zero on success | |
93 | * %%\emph{or} permanent failure; return %$-1$% on temporary failure. | |
94 | */ | |
95 | ||
96 | void (*diff)(struct bench_timer */*bt*/, | |
97 | struct bench_timing */*delta_out*/, | |
98 | const struct bench_time */*t0*/, | |
99 | const struct bench_time */*t1*/); | |
100 | /* Subtract the time @t0@ from the time @t1@, leaving the result in | |
101 | * @*delta_out@, setting flags as appropriate. | |
102 | */ | |
67b5031e | 103 | |
b64eb60f | 104 | void (*destroy)(struct bench_timer */*bt*/); |
67b5031e | 105 | /* Release the timer and any resources it holds. */ |
b64eb60f MW |
106 | }; |
107 | ||
108 | struct bench_state { | |
67b5031e MW |
109 | struct bench_timer *tm; /* a timer */ |
110 | double target_s; /* target time to run benchmarks */ | |
c91413e6 | 111 | unsigned f; /* calibration flags (@BTF_...@) */ |
13ee7406 | 112 | #define BTF_CLB 0x0100 /* tried calibrating */ |
67b5031e | 113 | struct { double m, c; } clk, cy; /* calculated overheads */ |
b64eb60f MW |
114 | }; |
115 | ||
67b5031e | 116 | typedef void bench_fn(unsigned long /*n*/, void */*ctx*/); |
c91413e6 | 117 | /* Run the benchmark @n@ times, given a context pointer @ctx@. */ |
b64eb60f | 118 | |
b1a20bee MW |
119 | enum { |
120 | BTU_OP, /* counting operations of some kind */ | |
121 | BTU_BYTE, /* counting bytes (@rbuf >= 0@) */ | |
122 | BTU_LIMIT /* (number of units) */ | |
123 | }; | |
124 | ||
125 | /*----- Macros ------------------------------------------------------------*/ | |
126 | ||
127 | /* --- @BENCH_TIMELOOP_DECLS@ --- * | |
128 | * | |
129 | * Arguments: --- | |
130 | * | |
131 | * Use: Expands to the declarations needed by @BENCH_TIMELOOP_TAG@. | |
132 | * These should be at block scope, not at toplevel. | |
133 | */ | |
134 | ||
135 | #define BENCH_TIMELOOP_DECLS \ | |
136 | struct bench_timer *_bench_tm; \ | |
137 | struct bench_time _bench_t0, _bench_t1; \ | |
138 | unsigned long _bench_n, _bench_n1 | |
139 | ||
140 | /* --- @BENCH_TIMELOOP_TAG@ --- * | |
141 | * | |
142 | * Arguments: @tag@ = control structure tag | |
143 | * @struct bench_state *b@ = benchmark state | |
144 | * @struct bench_timing *delta_out@ = where to put the result | |
145 | * @double n@ = number of iterations | |
146 | * | |
147 | * Returns: --- | |
148 | * | |
149 | * Use: @BENCH_TIMELOOP_TAG(tag, b, delta_out, n) stmt@ runs @stmt@ | |
150 | * @n@ times, capturing the time and/or CPU cycles taken in | |
151 | * @*delta_out@. The iteration count must be no more than the | |
152 | * %%\emph{square}%% of @ULONG_MAX@. If @stmt@ executes a free | |
153 | * @break@ statement, then the containing loop -- which ust | |
154 | * exist -- is exited. | |
155 | * | |
156 | * This macro is not intended to be used directly by users: it | |
157 | * is used internally by @bench_calibrate@ and @BENCH_MEASURE@. | |
158 | */ | |
159 | ||
160 | #define BENCH_TIMELOOP_TAG(tag, b, delta_out, n) \ | |
161 | MC_BEFORE(tag##__benchtl_setup, { \ | |
162 | double _R = ULONG_MAX; double _n = (n); \ | |
163 | \ | |
164 | _bench_tm = (b)->tm; \ | |
165 | if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; } \ | |
166 | else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; } \ | |
167 | }) \ | |
168 | MC_TARGET(tag##__benchtl_break, { break; }) \ | |
169 | MC_AFTER(tag##__benchtl_end, { \ | |
170 | _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \ | |
171 | }) \ | |
172 | MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--) \ | |
173 | MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; }) \ | |
174 | MC_DOWHILE(tag##__benchtl_t1, \ | |
175 | _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1)) \ | |
176 | MC_WRAP(tag##__benchtl_t0, \ | |
177 | { while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \ | |
178 | { ; }, \ | |
179 | { MC_GOTARGET(tag##__benchtl_break); }) | |
180 | ||
181 | /* --- @BENCH_MEASURE_DECLS@ --- * | |
182 | * | |
183 | * Arguments: --- | |
184 | * | |
185 | * Use: Expands to the declarations needed by @BENCH_MEASURE@. | |
186 | * These should be at block scope, not at toplevel. | |
187 | */ | |
188 | ||
189 | #define BENCH_MEASURE_DECLS \ | |
190 | struct bench_state *_bench_b; \ | |
191 | struct bench_timing *_bench_t; \ | |
192 | double _bench_nn; \ | |
193 | BENCH_TIMELOOP_DECLS | |
194 | ||
195 | /* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- * | |
196 | * | |
197 | * Arguments: @tag@ = control structure tag | |
198 | * @struct bench_state *b@ = benchmark state | |
199 | * @int &rc@ = where to put the result code (zero on success, | |
200 | * %$-1$% on failure) | |
201 | * @struct bench_timing *t_out@ = where to put the result | |
202 | * @double base@ = number of operations per external iteration | |
203 | * | |
204 | * Returns: --- | |
205 | * | |
206 | * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the | |
207 | * execution of @stmt@. | |
208 | * | |
209 | * The statement should perform @_bench_n@ iterations of some | |
210 | * operation. It will be invoked multiple times, with varying | |
211 | * iteration counts, so as to run for approximately | |
212 | * @b->target_s@ seconds. | |
213 | * | |
214 | * On success, the resulting timing is left in @*t_out@, with | |
215 | * @t_out->n@ holding the product of the final iteration count | |
216 | * and @base@, and @rc@ is set to zero. If the timer fails, or | |
217 | * if @stmt@ invokes a free @break@ statement, then @rc@ is set | |
218 | * to %$-1$%. | |
219 | * | |
220 | * The macro @BENCH_MEASURE_TAG@ is the same, except that it | |
221 | * allows an explicit control-structure tag to be set so that it | |
222 | * can be used within larger control structure macros. | |
223 | */ | |
224 | ||
225 | #define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \ | |
226 | MC_BEFORE(tag##__benchmsr_setup, { \ | |
227 | _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \ | |
228 | if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \ | |
229 | }) \ | |
230 | MC_TARGET(tag##__benchmsr_done, \ | |
231 | { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \ | |
232 | MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \ | |
233 | for (;;) \ | |
234 | MC_WRAP(tag##__benchmsr_loop, \ | |
235 | { ; }, \ | |
236 | { _bench_t->f &= _bench_b->f; \ | |
237 | if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \ | |
238 | if (bench_adapt(_bench_b, &_bench_nn, _bench_t)) \ | |
239 | MC_GOTARGET(tag##__benchmsr_done); \ | |
240 | }, \ | |
241 | { MC_GOTARGET(tag##__benchmsr_fail); }) \ | |
242 | BENCH_TIMELOOP_TAG(tag##__benchmsr_time, _bench_b, _bench_t, _bench_nn) | |
243 | ||
244 | #define BENCH_MEASURE(b, rc, t_out, base) \ | |
245 | BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base) | |
246 | ||
247 | /* --- @BENCHMARK_DECLS@ --- * | |
248 | * | |
249 | * Arguments: --- | |
250 | * | |
251 | * Use: Expands to the declarations needed by @BENCHMARK_TAG@. | |
252 | * These should be at block scope, not at toplevel. | |
253 | */ | |
254 | ||
255 | #define BENCHMARK_DECLS \ | |
256 | struct bench_timing _bench_tm; \ | |
257 | int _bench_rc; \ | |
258 | BENCH_MEASURE_DECLS | |
259 | ||
260 | /* --- @BENCHMARK_TAG@ --- * | |
261 | * | |
262 | * Arguments: @tag@ = control structure tag | |
263 | * @struct bench_state *b@ = benchmark state | |
264 | * @struct gprintf_ops *gops, void *go@ = output formatter | |
265 | * @unsigned unit@ = unit being measured (@BTU_...@ code) | |
266 | * @double base@ = number of units per external iteration | |
267 | * | |
268 | * Returns: --- | |
269 | * | |
270 | * Use: @BENCHMARK_TAG(tag, b, gops, go, unit, base) stmt@ measures | |
271 | * the execution of @stmt@ and writes a report to an output | |
272 | * formatter. The @stmt@ should run @_bench_n@ iterations of | |
273 | * the operation to be measured. | |
274 | * | |
275 | * No tagless version of this macro is provided, because it is | |
276 | * expected to be useful primarily in the construction of | |
277 | * higher-level macros. | |
278 | */ | |
279 | ||
280 | #define BENCHMARK_TAG(tag, b, gops, go, unit, base) \ | |
281 | MC_AFTER(tag##__benchmark_after, { \ | |
282 | if (_bench_rc) gprintf((gops), (go), "FAILED"); \ | |
283 | else bench_report((gops), (go), _bench_b, (unit), &_bench_tm); \ | |
284 | }) \ | |
285 | BENCH_MEASURE_TAG(tag##__benchmark_measure, \ | |
286 | (b), _bench_rc, &_bench_tm, (base)) | |
287 | ||
b64eb60f MW |
288 | /*----- Functions provided ------------------------------------------------*/ |
289 | ||
67b5031e MW |
290 | /* --- @bench_createtimer@ --- * |
291 | * | |
13ee7406 | 292 | * Arguments: @const char *config@ = user-supplied configuration string |
67b5031e | 293 | * |
13ee7406 MW |
294 | * Returns: A freshly constructed standard timer object, or null on |
295 | * failure. | |
67b5031e MW |
296 | * |
297 | * Use: Allocate a timer. Dispose of it by calling | |
298 | * @tm->ops->destroy(tm)@ when you're done. | |
13ee7406 MW |
299 | * |
300 | * Applications should not set configuration strings except as | |
301 | * established by user action, e.g., from a command-line option, | |
302 | * environment variable, or configuration file. | |
67b5031e MW |
303 | */ |
304 | ||
13ee7406 | 305 | extern struct bench_timer *bench_createtimer(const char *config); |
b64eb60f | 306 | |
67b5031e MW |
307 | /* --- @bench_init@ --- * |
308 | * | |
309 | * Arguments: @struct bench_state *b@ = bench state to initialize | |
13ee7406 | 310 | * @struct bench_timer *tm@ = timer to attach, or null |
67b5031e | 311 | * |
13ee7406 MW |
312 | * Returns: Zero on success, %$-1$% on failure. |
313 | * | |
314 | * Use: Initialize the benchmark state. On success, the timer state | |
315 | * still needs to be calibrated (use @bench_calibrate@) before | |
316 | * it can be used, but this will be done automatically by | |
317 | * @bench_measure@ if it's not done by hand earlier. The timer | |
318 | * is now owned by the benchmark state and will be destroyed by | |
319 | * @bench_destroy@. | |
67b5031e | 320 | * |
13ee7406 MW |
321 | * The only reason for failure is if @tm@ was null on entry, |
322 | * and automatic construction of a timer failed. The state is | |
323 | * safe to discard, but calling @bench_destroy@ is safe too. | |
67b5031e MW |
324 | */ |
325 | ||
13ee7406 | 326 | extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/); |
b64eb60f | 327 | |
67b5031e MW |
328 | /* --- @bench_destroy@ --- * |
329 | * | |
330 | * Arguments: @struct bench_state *b@ = bench state | |
331 | * | |
332 | * Returns: --- | |
333 | * | |
334 | * Use: Destroy the benchmark state, releasing the resources that it | |
335 | * holds. | |
336 | */ | |
337 | ||
e63124bc | 338 | extern void bench_destroy(struct bench_state */*b*/); |
b64eb60f | 339 | |
67b5031e MW |
340 | /* --- @bench_calibrate@ --- * |
341 | * | |
342 | * Arguments: @struct bench_state *b@ = bench state | |
b1a20bee | 343 | * @unsigned f@ = calibration flags |
67b5031e | 344 | * |
13ee7406 | 345 | * Returns: Zero on success, %$-1$% if calibration failed. |
67b5031e MW |
346 | * |
347 | * Use: Calibrate the benchmark state, so that it can be used to | |
348 | * measure performance reasonably accurately. | |
b1a20bee MW |
349 | * |
350 | * Calibration will take into account how the subject code is | |
351 | * going to be located. If you're going to use @BENCH_MEASURE@ | |
352 | * to measure a piece of literal code, then leave @f@ zero. If | |
353 | * the code to be measured is going to be executed via an | |
354 | * indirect branch, e.g., through the @measure@ function, then | |
355 | * set @BTF_INDIRECT@. | |
67b5031e MW |
356 | */ |
357 | ||
b1a20bee MW |
358 | #define BTF_INDIRECT 1u |
359 | extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/); | |
360 | ||
361 | /* --- @bench_preflight@ --- * | |
362 | * | |
363 | * Arguments: @struct bench_state *b@ = benchmark state | |
364 | * | |
365 | * Returns: Zero on success, %$-1$% on failure. | |
366 | * | |
367 | * Use: Prepares for benchmarking on the current thread. Current | |
368 | * checks are that the timer is calibrated and that it can | |
369 | * successfully measure time; the timer preflight is also run. | |
370 | * | |
371 | * Users are unlikely to find this function useful: it's called | |
372 | * automatically by the @BENCH_MEASURE@ macro and the | |
373 | * @bench_measure@ function. | |
374 | */ | |
375 | ||
376 | extern int bench_preflight(struct bench_state */*b*/); | |
377 | ||
378 | /* --- @bench_adapt@ --- * | |
379 | * | |
380 | * Arguments: @struct bench_state *b@ = benchmark state | |
381 | * @double *n_inout@ = number of iterations, updated | |
382 | * @const struct bench_timing *t@ = timing from the previous run | |
383 | * | |
384 | * Returns: Nonzero if the measurement is sufficient; zero to run again. | |
385 | * | |
386 | * Use: This function determines a suitable number of iterations of a | |
387 | * benchmark function to perform next. It is used in a loop | |
388 | * such as the following. | |
389 | * | |
390 | * @double n = 1.0;@ | |
391 | * @struct bench_timing t;@ | |
392 | * | |
393 | * @do {@ | |
394 | * (run @n@ iterations; set @t@ to the timing) | |
395 | * @} while (!bench_adapt(b, &n, &t));@ | |
396 | * | |
397 | * On entry, @*n_inout@ should be the number of iterations | |
398 | * performed by the previous pass, and @*t@ the resulting time; | |
399 | * the @BTF_TIMEOK@ flag must be set @t->f@. If the timing is | |
400 | * sufficient -- @t->t@ is sufficiently close to @b->target_s@ | |
401 | * -- then the function returns nonzero to indicate that | |
402 | * measurement is complete. Otherwise, it sets @*n_inout@ to a | |
403 | * new, larger iteration count and returns zero to indicate that | |
404 | * a further pass is necessary. | |
405 | * | |
406 | * Users are unlikely to find this function useful: it's called | |
407 | * automatically by the @BENCH_MEASURE@ macro and the | |
408 | * @bench_measure@ function. | |
409 | */ | |
410 | ||
411 | extern int bench_adapt(struct bench_state */*b*/, double */*n_inout*/, | |
412 | const struct bench_timing */*t*/); | |
413 | ||
414 | /* --- @bench_adjust@ --- * | |
415 | * | |
416 | * Arguments: @struct bench_state *b@ = benchmark state | |
417 | * @struct bench_timing *t_inout@ = timing to adjust | |
418 | * @double n@ = number of external iterations performed | |
419 | * @double base@ = number of internal operations per external | |
420 | * iteration | |
421 | * | |
422 | * Returns: --- | |
423 | * | |
424 | * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP@, | |
425 | * according to the calibration data captured in @b@. | |
426 | * On exit, the timing data is updated, and @t->n@ is set to the | |
427 | * product @n*base@. | |
428 | * | |
429 | * Users are unlikely to find this function useful: it's called | |
430 | * automatically by the @BENCH_MEASURE@ macro and the | |
431 | * @bench_measure@ function. | |
432 | */ | |
433 | ||
434 | extern void bench_adjust(struct bench_state */*b*/, | |
435 | struct bench_timing */*t_inout*/, | |
436 | double /*n*/, double /*base*/); | |
b64eb60f | 437 | |
67b5031e MW |
438 | /* --- @bench_measure@ --- * |
439 | * | |
8d372122 MW |
440 | * Arguments: @struct bench_state *b@ = benchmark state |
441 | * @struct bench_timing *t_out@ = where to leave the timing | |
67b5031e MW |
442 | * @double base@ = number of internal units per call |
443 | * @bench_fn *fn@, @void *ctx@ = benchmark function to run | |
444 | * | |
13ee7406 | 445 | * Returns: Zero on success, %$-1$% if timing failed. |
67b5031e MW |
446 | * |
447 | * Use: Measure a function. The function @fn@ is called adaptively | |
448 | * with an iteration count @n@ set so as to run for | |
449 | * approximately @b->target_s@ seconds. | |
450 | * | |
451 | * The result is left in @*t_out@, with @t_out->n@ counting the | |
452 | * final product of the iteration count and @base@ (which might, | |
453 | * e.g., reflect the number of inner iterations the function | |
454 | * performs, or the number of bytes it processes per iteration). | |
455 | */ | |
456 | ||
8d372122 MW |
457 | extern int bench_measure(struct bench_state */*b*/, |
458 | struct bench_timing */*t_out*/, | |
67b5031e | 459 | double /*base*/, bench_fn */*fn*/, void */*ctx*/); |
b64eb60f | 460 | |
b1a20bee MW |
461 | /*----- Reporting ---------------------------------------------------------*/ |
462 | ||
463 | /* --- @bench_report@ --- * | |
464 | * | |
465 | * Arguments: @const struct gprintf_ops *gops, void *gp@ = output formatter | |
466 | * @unsigned unit@ = unit processed by the benchmark function | |
467 | * @const struct bench_timing *t@ = benchmark result | |
468 | * | |
469 | * Returns: --- | |
470 | * | |
471 | * Use: Format, to the output identified by @gops@ and @go@, a | |
472 | * human-readable report of the benchmarking result @t@. No | |
473 | * newline is appended. | |
474 | * | |
475 | * The output format is subject to change in later versions. | |
476 | */ | |
477 | ||
478 | extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/, | |
479 | unsigned /*unit*/, | |
480 | const struct bench_timing */*t*/); | |
481 | ||
b64eb60f MW |
482 | /*----- That's all, folks -------------------------------------------------*/ |
483 | ||
484 | #ifdef __cplusplus | |
485 | } | |
486 | #endif | |
487 | ||
488 | #endif |