3 .\" Manual for benchmarking core
5 .\" (c) 2024 Straylight/Edgeware
8 .\"----- Licensing notice ---------------------------------------------------
10 .\" This file is part of the mLib utilities library.
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
30 .\"--------------------------------------------------------------------------
31 .TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library"
32 .\" @bench_createtimer
38 .\"--------------------------------------------------------------------------
40 bench \- low-level benchmarking tools
42 .\"--------------------------------------------------------------------------
46 .B "#include <mLib/bench.h>"
49 .B "struct bench_time {"
56 .B "struct bench_timing {"
63 .B "struct bench_timerops {"
64 .BI " void (*describe)(struct bench_timer *" bt ", dstr *" d );
65 .BI " void (*now)(struct bench_timer *" bt ", struct bench_time *" t_out );
66 .BI " void (*destroy)(struct bench_timer *" bt );
68 .B "struct bench_timer {"
69 .B " const struct bench_timerops *ops;"
72 .B "struct bench_state {"
74 .B " double target_s;"
78 .BI "typedef void bench_fn(unsigned long " n ", void *" ctx );
80 .B "#define BTF_TIMEOK ..."
81 .B "#define BTF_CYOK ..."
82 .B "#define BTF_CLB ..."
83 .B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
85 .B "struct bench_timer *bench_createtimer(void);"
87 .BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm );
88 .BI "void bench_destroy(struct bench_state *" b );
89 .BI "int bench_calibrate(struct bench_state *" b );
90 .ta \w'\fBint bench_measure('u
91 .BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out ,
92 .BI " double " base ", bench_fn *" fn ", void *" ctx );
95 .\"--------------------------------------------------------------------------
100 provides declarations and defintions
101 for performing low-level benchmarks.
105 This function will be described in detail later,
107 it calls a caller-provided function,
108 instructing it to run adaptively chosen numbers of iterations,
109 in order to get a reasonably reliable measurement of its running time,
110 and then reports its results by filling in a structure.
112 With understanding this function as our objective,
113 we must examine all of the pieces involved in making it work.
115 .SS Timers in general
118 is a gadget which is capable of reporting the current time,
119 in seconds (ideally precise to tiny fractions of a second),
120 and/or in CPU cycles.
121 A timer is represented by a pointer to an object of type
122 .BR "struct bench_timer" .
123 This structure has a single member,
126 .BR "struct bench_timerops" ,
127 which is a table of function pointers;
128 typically, a timer has more data following this,
129 but this fact is not exposed to applications.
131 The function pointers in
132 .B "struct bench_timerops"
137 must always point to the timer object itself.
139 .IB tm ->ops->describe( tm ", " d)
140 Write a description of the timer to the dynamic string
143 .IB tm ->ops->now( tm ", " t_out)
144 Store the current time in
148 used to represent the time reported by a timer
149 is described in detail below.
151 .IB tm ->ops->destroy( tm )
153 releasing all of the resources that it holds.
155 A time, a reported by a timer, is represented by the
156 .BR "struct bench_time" .
157 A passage-of-time measurement is stored in the
161 members, holding seconds and nanoseconds respectively.
162 (A timer need not have nanosecond precision.
163 The exact interpretation of the time \(en
164 e.g., whether it measures wallclock time,
166 or total thread CPU time \(en
167 is a matter for the specific timer implementation.)
168 A cycle count is stored in the
175 is set if the passage-of-time measurement
181 is set if the cycle count
184 Neither the time nor the cycle count need be measured
185 relative to any particular origin.
196 if the timer returned any valid timing information.
198 .SS The built-in timer
201 constructs and returns a timer.
202 It takes a single argument,
205 from which it reads configuration information.
208 fails, it returns a null pointer.
212 pointer may safely be null,
213 in which case a default configuration will be used.
216 set this pointer to a value supplied by a user,
217 e.g., through a command-line argument,
218 environment variable, or
221 The built-in timer makes use of one or two
223 a `clock' subtimer to measure the passage of time,
224 and possibly a `cycle' subtimer to count CPU cycles.
226 The configuration string consists of a sequence of words
227 separated by whitespace.
228 There may be additional whitespace at the start and end of the string.
229 The words recognized are as follows.
232 Prints a list of the available clock and cycle subtimers
236 Use the first of the listed clock subtimers
237 to initialize successfully
238 as the clock subtimer.
239 If none of the subtimers can be initialized,
240 then construction of the timer as a whole fails.
243 Use the first of the listed subtimers
244 to initialize successfully
245 as the cycle subtimer.
246 If none of the subtimers can be initialized,
247 then construction of the timer as a whole fails.
249 The clock subtimers are as follows.
250 Not all of them will be available on every platform.
252 .B posix-thread-cputime
253 Measures the passage of time using
254 .BR clock_gettime (2),
256 .B CLOCK_\%THREAD_\%CPUTIME_\%ID
260 Measures the passage of time using
264 is part of the original ANSI\ C standard,
265 this subtimer should always be available.
266 However, it may produce unhelpful results
267 if other threads are running.
269 The cycle subtimers are as follows.
270 Not all of them will be available on every platform.
273 Counts CPU cycles using the Linux-specific
274 .BR perf_event_open (2)
276 .BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES
278 Only available on Linux.
279 It will fail to initialize
280 if access to performance counters is restricted,
282 .B /proc/sys/kernel/perf_event_paranoid
286 Counts CPU cycles using the x86
289 This instruction is not really suitable for performance measurement:
290 it gives misleading results on CPUs with variable clock frequency.
293 A dummy cycle counter,
294 which will initialize successfully
295 and then fail to report cycle counts.
296 This is a reasonable fallback in many situations.
298 The built-in preference order for clock subtimers,
299 from most to least preferred, is
300 .B posix-thread-cputime
303 The built-in preference order for cycle subtimers,
304 from most to least preferred, is
311 .SS The benchmark state
314 tracks the information needed to measure performance of functions.
315 It is represented by a
316 .B struct bench_state
319 The benchmark state is initialized by calling
321 passing the address of the state structure to be initialized,
322 and a pointer to a timer.
325 is called with a non-null timer pointer,
326 then it will not fail;
327 the benchmark state will be initialized,
328 and the function returns zero.
329 If the timer pointer is null,
332 attempts to construct a timer for itself
334 .BR bench_createtimer .
336 then the benchmark state will be initialized,
337 and the function returns zero.
339 the timer becomes owned by the benchmark state:
342 on the benchmark state will destroy the timer.
345 is called with a null timer pointer,
346 and its attempt to create a timer for itself fails,
350 the benchmark state is not initialized
351 and can safely be discarded;
355 on the unsuccessfully benchmark state is safe and has no effect.
360 releases any resources it holds,
361 most notably its timer, if any.
364 .B struct bench_state
365 is defined in the header file,
366 only two members are available for use by applications.
369 A word containing flags.
372 The target time for which to try run a benchmark, in seconds.
373 After initialization, this is set to 1.0,
374 though applications can override it.
376 Before the benchmark state can be used in measurements,
379 This is performed by calling
381 on the benchmark state.
382 Calibration takes a noticeable amount of time
383 (currently about 0.25\*,s),
384 so it makes sense to defer it until it's known to be necessary.
386 Calibration is carried out separately, but in parallel,
387 for the timer's passage-of-time measurement and cycle counter.
388 Either or both of these calibrations can succeed or fail;
389 if passage-of-time calibration fails,
390 then cycle count calibration is impossible.
394 sets flag in the benchmark state's
397 if passage-of-time calibration succeeded,
400 if cycle-count calibration succeeded,
405 is set unconditionally,
406 as a persistent indication that calibration has been attempted.
410 function returns zero if it successfully calibrated
411 at least the passage-of-time measurement;
412 otherwise, it returns \-1.
415 is called for a second or subsequent time on the same benchmark state,
416 it returns immediately,
417 either returning 0 or \-1
418 according to whether passage-of-time had previously been calibrated.
422 .I benchmark function
425 .BI "void " fn "(unsigned long " n ", void *" ctx );
427 When called, it should perform the operation to be measured
432 argument is a pointer passed into
434 for the benchmark function's own purposes.
438 receives five arguments.
441 points to the benchmark state to be used.
445 .BR struct bench_timing
446 in which the measurement should be left.
447 This structure is described below.
450 is a count of the number of operations performed
451 by each iteration of the benchmark function.
454 is a benchmark function, described above.
457 is a pointer to be passed to the benchmark function.
459 does not interpret this pointer in any way.
463 function calls its benchark function repeatedly
464 with different iteration counts
466 with the objective that the call take approximately
468 seconds, as established in the benchmark state.
475 is satisfied when a call takes at least
477 Once the function finds a satisfactory number of iterations,
478 it stores the results in
480 If measurement succeeds, then
484 most likely because the timer failed \(en
489 structure reports the outcome of a successful measurement.
495 is set if the passage-of-time measurement in
499 is set if the cycle count in
504 The number of iterations performed by the benchmark function
505 on its satisfactory run,
510 The time taken for the satisfactory run of the benchmark function,
518 The number of CPU cycles used
519 in the satisfactory run of the benchmark function,
526 .\"--------------------------------------------------------------------------
532 .\"--------------------------------------------------------------------------
535 Mark Wooding, <mdw@distorted.org.uk>
537 .\"----- That's all, folks --------------------------------------------------