chiark - git - mdw - mLib/blob - test/bench.h

   1 /* -*-c-*-
   2  *
   3  * Benchmarking support
   4  *
   5  * (c) 2023 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of the mLib utilities library.
  11  *
  12  * mLib is free software: you can redistribute it and/or modify it under
  13  * the terms of the GNU Library General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or (at
  15  * your option) any later version.
  16  *
  17  * mLib is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20  * License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with mLib.  If not, write to the Free Software
  24  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25  * USA.
  26  */
  27
  28 #ifndef MLIB_BENCH_H
  29 #define MLIB_BENCH_H
  30
  31 #ifdef __cplusplus
  32   extern "C" {
  33 #endif
  34
  35 /*----- Header files ------------------------------------------------------*/
  36
  37 #include <limits.h>
  38 #include <time.h>
  39
  40 #ifndef MLIB_BITS_H
  41 #  include "bits.h"
  42 #endif
  43
  44 #ifndef MLIB_CONTROL_H
  45 #  include "control.h"
  46 #endif
  47
  48 #ifndef MLIB_DSTR_H
  49 #  include "dstr.h"
  50 #endif
  51
  52 #ifndef MLIB_MACROS_H
  53 #  include "macros.h"
  54 #endif
  55
  56 #ifndef MLIB_GPRINTF_H
  57 #  include "gprintf.h"
  58 #endif
  59
  60 /*----- Timers ------------------------------------------------------------*/
  61
  62 struct bench_time {
  63   unsigned f;                           /* flags */
  64 #define BTF_TIMEOK 1u                   /*   @s@ ad @ns@ slots are value */
  65 #define BTF_CYOK 2u                     /*   @cy@ slot is valid */
  66 #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /*   some part is useful */
  67   union {
  68     struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */
  69     clock_t clk;                        /* @clock@ */
  70     kludge64 rawns;                     /* raw nanosecond count */
  71   } t;                                  /* time */
  72   kludge64 cy;                          /* count of CPU cycles */
  73 };
  74
  75 struct bench_timing {
  76   unsigned f;                           /* flags (@BTF_...@) */
  77   double n, t, cy;                      /* count, time, and cycles */
  78 };
  79
  80 struct bench_timer {
  81   const struct bench_timerops *ops;
  82   unsigned ref;
  83 };
  84
  85 struct bench_timerops {
  86   void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
  87     /* Write a description of the timer to @d@. */
  88
  89   int (*preflight)(struct bench_timer */*bt*/);
  90     /* Return zero if timer is ready to go, or %$-1$% otherwise.  The @now@
  91      * function will only be called following a successful @preflight@ on the
  92      * same thread.
  93      */
  94
  95   int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/,
  96               unsigned /*f*/);
  97 #define BTF_T0 0u                       /* fetching first time of a pair */
  98 #define BTF_T1 1u                       /* fetching second time of a pair */
  99     /* Fill in @*t_out@ with the current time.  Return zero on success
 100      * %%\emph{or} permanent failure; return %$-1$% on temporary failure.
 101      */
 102
 103   void (*diff)(struct bench_timer */*bt*/,
 104                struct bench_timing */*delta_out*/,
 105                const struct bench_time */*t0*/,
 106                const struct bench_time */*t1*/);
 107     /* Subtract the time @t0@ from the time @t1@, leaving the result in
 108      * @*delta_out@, setting flags as appropriate.
 109      */
 110
 111   void (*destroy)(struct bench_timer */*bt*/);
 112     /* Release the timer and any resources it holds. */
 113 };
 114
 115 /* --- @bench_createtimer@ --- *
 116  *
 117  * Arguments:   @const char *config@ = user-supplied configuration string
 118  *
 119  * Returns:     A freshly constructed standard timer object, or null on
 120  *              failure.
 121  *
 122  * Use:         Allocate a timer.  Dispose of it by calling
 123  *              @tm->ops->destroy(tm)@ when you're done.
 124  *
 125  *              Applications should not set configuration strings except as
 126  *              established by user action, e.g., from a command-line option,
 127  *              environment variable, or configuration file.
 128  */
 129
 130 extern struct bench_timer *bench_createtimer(const char *config);
 131
 132 /* --- @BENCH_TIMELOOP_DECLS@ --- *
 133  *
 134  * Arguments:   ---
 135  *
 136  * Use:         Expands to the declarations needed by @BENCH_TIMELOOP_TAG@.
 137  *              These should be at block scope, not at toplevel.
 138  */
 139
 140 #define BENCH_TIMELOOP_DECLS                                            \
 141   struct bench_timer *_bench_tm;                                        \
 142   struct bench_time _bench_t0, _bench_t1;                               \
 143   unsigned long _bench_n, _bench_n1
 144
 145 /* --- @BENCH_TIMELOOP_TAG@ --- *
 146  *
 147  * Arguments:   @tag@ = control structure tag
 148  *              @struct bench_state *tm@ = benchmark timer
 149  *              @struct bench_timing *delta_out@ = where to put the result
 150  *              @double n@ = number of iterations
 151  *
 152  * Returns:     ---
 153  *
 154  * Use:         @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
 155  *              @n@ times, capturing the time and/or CPU cycles taken in
 156  *              @*delta_out@.  The iteration count must be no more than the
 157  *              %%\emph{square}%% of @ULONG_MAX@.  If @stmt@ executes a free
 158  *              @break@ statement, then the containing loop -- which must
 159  *              exist -- is exited.
 160  *
 161  *              This macro is not intended to be used directly by users: it
 162  *              is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
 163  */
 164
 165 #define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak)              \
 166   MC_BEFORE(tag##__benchtl_setup, {                                     \
 167     double _R = ULONG_MAX; double _n = (n);                             \
 168                                                                         \
 169     _bench_tm = (tm);                                                   \
 170     if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; }                     \
 171     else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; }           \
 172   })                                                                    \
 173   MC_TARGET(tag##__benchtl_break, { break; })                           \
 174   MC_AFTER(tag##__benchtl_end, {                                        \
 175     _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \
 176   })                                                                    \
 177   MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--)                     \
 178     MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; })            \
 179     MC_DOWHILE(tag##__benchtl_t1,                                       \
 180                _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1))      \
 181       MC_WRAP(tag##__benchtl_t0,                                        \
 182         { while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \
 183         { ; },                                                          \
 184         { MC_GOTARGET(tag##__benchtl_break); })
 185
 186 /*----- Measuring ---------------------------------------------------------*/
 187
 188 struct bench_state {
 189   struct bench_timer *tm;               /* a timer */
 190   double target_s;                      /* target time to run benchmarks */
 191   unsigned f;                           /* calibration flags (@BTF_...@) */
 192 #define BTF_CLB 0x0100                  /*   tried calibrating */
 193   struct { double m, c; } clk, cy;      /* calculated overheads */
 194 };
 195
 196 typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
 197   /* Run the benchmark @n@ times, given a context pointer @ctx@. */
 198
 199 /* --- @bench_init@ --- *
 200  *
 201  * Arguments:   @struct bench_state *b@ = bench state to initialize
 202  *              @struct bench_timer *tm@ = timer to attach, or null
 203  *
 204  * Returns:     Zero on success, %$-1$% on failure.
 205  *
 206  * Use:         Initialize the benchmark state.  On success, the timer state
 207  *              still needs to be calibrated (use @bench_calibrate@) before
 208  *              it can be used, but this will be done automatically by
 209  *              @bench_measure@ if it's not done by hand earlier.  The timer
 210  *              is now owned by the benchmark state and will be destroyed by
 211  *              @bench_destroy@.
 212  *
 213  *              The only reason for failure is if @tm@ was null on entry,
 214  *              and automatic construction of a timer failed.  The state is
 215  *              safe to discard, but calling @bench_destroy@ is safe too.
 216  */
 217
 218 extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/);
 219
 220 /* --- @bench_destroy@ --- *
 221  *
 222  * Arguments:   @struct bench_state *b@ = bench state
 223  *
 224  * Returns:     ---
 225  *
 226  * Use:         Destroy the benchmark state, releasing the resources that it
 227  *              holds.
 228  */
 229
 230 extern void bench_destroy(struct bench_state */*b*/);
 231
 232 /* --- @bench_calibrate@ --- *
 233  *
 234  * Arguments:   @struct bench_state *b@ = bench state
 235  *              @unsigned f@ = calibration flags
 236  *
 237  * Returns:     Zero on success, %$-1$% if calibration failed.
 238  *
 239  * Use:         Calibrate the benchmark state, so that it can be used to
 240  *              measure performance reasonably accurately.
 241  *
 242  *              Calibration will take into account how the subject code is
 243  *              going to be located.  If you're going to use @BENCH_MEASURE@
 244  *              to measure a piece of literal code, then leave @f@ zero.  If
 245  *              the code to be measured is going to be executed via an
 246  *              indirect branch, e.g., through the @measure@ function, then
 247  *              set @BTF_INDIRECT@.
 248  */
 249
 250 #define BTF_INDIRECT 1u
 251 extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/);
 252
 253 /* --- @bench_preflight@ --- *
 254  *
 255  * Arguments:   @struct bench_state *b@ = benchmark state
 256  *
 257  * Returns:     Zero on success, %$-1$% on failure.
 258  *
 259  * Use:         Prepares for benchmarking on the current thread.  Current
 260  *              checks are that the timer is calibrated and that it can
 261  *              successfully measure time; the timer preflight is also run.
 262  *
 263  *              Users are unlikely to find this function useful: it's called
 264  *              automatically by the @BENCH_MEASURE@ macro and the
 265  *              @bench_measure@ function.
 266  */
 267
 268 extern int bench_preflight(struct bench_state */*b*/);
 269
 270 /* --- @bench_adapt@ --- *
 271  *
 272  * Arguments:   @double *n_inout@ = number of iterations, updated
 273  *              @double target_s@ = target time in seconds
 274  *              @const struct bench_timing *t@ = timing from the previous run
 275  *
 276  * Returns:     Nonzero if the measurement is sufficient; zero to run again.
 277  *
 278  * Use:         This function determines a suitable number of iterations of a
 279  *              benchmark function to perform next.  It is used in a loop
 280  *              such as the following.
 281  *
 282  *                      @BENCH_TIMELOOP_DECLS;@
 283  *                      @struct bench_timer *tm;@
 284  *                      @struct bench_timing t;@
 285  *                      @double n = 1.0, target_s = 1.0;@
 286  *
 287  *                      @do {@
 288  *                        @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
 289  *                          (do @_bench_n@ iterations of some computation)@;@
 290  *                      @} while (!bench_adapt(&n, target_s, &t));@
 291  *
 292  *              On entry, @*n_inout@ should be the number of iterations
 293  *              performed by the previous pass, and @*t@ the resulting time;
 294  *              the @BTF_TIMEOK@ flag must be set in @t->f@.  If the timing
 295  *              is sufficient -- @t->t@ is sufficiently close to @target_s@
 296  *              -- then the function returns nonzero to indicate that
 297  *              measurement is complete.  Otherwise, it sets @*n_inout@ to a
 298  *              new, larger iteration count and returns zero to indicate that
 299  *              a further pass is necessary.
 300  *
 301  *              Users are unlikely to find this function useful: it's called
 302  *              automatically by the @BENCH_MEASURE@ macro and the
 303  *              @bench_measure@ function.
 304  */
 305
 306 extern int bench_adapt(double */*n_inout*/, double /*target_s*/,
 307                        const struct bench_timing */*t*/);
 308
 309 /* --- @bench_adjust@ --- *
 310  *
 311  * Arguments:   @struct bench_state *b@ = benchmark state
 312  *              @struct bench_timing *t_inout@ = timing to adjust
 313  *              @double n@ = number of external iterations performed
 314  *              @double base@ = number of internal operations per external
 315  *                      iteration
 316  *
 317  * Returns:     ---
 318  *
 319  * Use:         Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
 320  *              according to the calibration data captured in @b@.
 321  *              On exit, the timing data is updated, and @t->n@ is set to the
 322  *              product @n*base@.
 323  *
 324  *              Users are unlikely to find this function useful: it's called
 325  *              automatically by the @BENCH_MEASURE@ macro and the
 326  *              @bench_measure@ function.
 327  */
 328
 329 extern void bench_adjust(struct bench_state */*b*/,
 330                          struct bench_timing */*t_inout*/,
 331                          double /*n*/, double /*base*/);
 332
 333 /* --- @BENCH_MEASURE_DECLS@ --- *
 334  *
 335  * Arguments:   ---
 336  *
 337  * Use:         Expands to the declarations needed by @BENCH_MEASURE@.
 338  *              These should be at block scope, not at toplevel.
 339  */
 340
 341 #define BENCH_MEASURE_DECLS                                             \
 342   struct bench_state *_bench_b;                                         \
 343   struct bench_timing *_bench_t;                                        \
 344   double _bench_nn;                                                     \
 345   BENCH_TIMELOOP_DECLS
 346
 347 /* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
 348  *
 349  * Arguments:   @tag@ = control structure tag
 350  *              @struct bench_state *b@ = benchmark state
 351  *              @int &rc@ = where to put the result code (zero on success,
 352  *                      %$-1$% on failure)
 353  *              @struct bench_timing *t_out@ = where to put the result
 354  *              @double base@ = number of operations per external iteration
 355  *
 356  * Returns:     ---
 357  *
 358  * Use:         @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
 359  *              execution of @stmt@.
 360  *
 361  *              The statement should perform @_bench_n@ iterations of some
 362  *              operation.  It will be invoked multiple times, with varying
 363  *              iteration counts, so as to run for approximately
 364  *              @b->target_s@ seconds.
 365  *
 366  *              On success, the resulting timing is left in @*t_out@, with
 367  *              @t_out->n@ holding the product of the final iteration count
 368  *              and @base@, and @rc@ is set to zero.  If the timer fails, or
 369  *              if @stmt@ invokes a free @break@ statement, then @rc@ is set
 370  *              to %$-1$%.
 371  *
 372  *              The macro @BENCH_MEASURE_TAG@ is the same, except that it
 373  *              allows an explicit control-structure tag to be set so that it
 374  *              can be used within larger control structure macros.
 375  */
 376
 377 #define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)                      \
 378   MC_BEFORE(tag##__benchmsr_setup, {                                    \
 379     _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;                \
 380     if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);   \
 381   })                                                                    \
 382   MC_TARGET(tag##__benchmsr_done,                                       \
 383     { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
 384   MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })                       \
 385   for (;;)                                                              \
 386     MC_WRAP(tag##__benchmsr_loop,                                       \
 387       { ; },                                                            \
 388       { _bench_t->f &= _bench_b->f;                                     \
 389         if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
 390         if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t))      \
 391           MC_GOTARGET(tag##__benchmsr_done);                            \
 392       },                                                                \
 393       { MC_GOTARGET(tag##__benchmsr_fail); })                           \
 394     BENCH_TIMELOOP_TAG(tag##__benchmsr_time,                            \
 395                        _bench_b->tm, _bench_t, _bench_nn, { break; })
 396
 397 #define BENCH_MEASURE(b, rc, t_out, base)                               \
 398   BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
 399
 400 /* --- @bench_measure@ --- *
 401  *
 402  * Arguments:   @struct bench_state *b@ = benchmark state
 403  *              @struct bench_timing *t_out@ = where to leave the timing
 404  *              @double base@ = number of internal units per call
 405  *              @bench_fn *fn@, @void *ctx@ = benchmark function to run
 406  *
 407  * Returns:     Zero on success, %$-1$% if timing failed.
 408  *
 409  * Use:         Measure a function.  The function @fn@ is called adaptively
 410  *              with an iteration count @n@ set so as to run for
 411  *              approximately @b->target_s@ seconds.
 412  *
 413  *              The result is left in @*t_out@, with @t_out->n@ counting the
 414  *              final product of the iteration count and @base@ (which might,
 415  *              e.g., reflect the number of inner iterations the function
 416  *              performs, or the number of bytes it processes per iteration).
 417  */
 418
 419 extern int bench_measure(struct bench_state */*b*/,
 420                          struct bench_timing */*t_out*/,
 421                          double /*base*/, bench_fn */*fn*/, void */*ctx*/);
 422
 423 /*----- Reporting ---------------------------------------------------------*/
 424
 425 enum {
 426   BTU_OP,                              /* counting operations of some kind */
 427   BTU_BYTE,                             /* counting bytes (@rbuf >= 0@) */
 428   BTU_LIMIT                             /* (number of units) */
 429 };
 430
 431 /* --- @bench_report@ --- *
 432  *
 433  * Arguments:   @const struct gprintf_ops *gops, void *gp@ = output formatter
 434  *              @unsigned unit@ = unit processed by the benchmark function
 435  *              @const struct bench_timing *t@ = benchmark result
 436  *
 437  * Returns:     ---
 438  *
 439  * Use:         Format, to the output identified by @gops@ and @go@, a
 440  *              human-readable report of the benchmarking result @t@.  No
 441  *              newline is appended.
 442  *
 443  *              The output format is subject to change in later versions.
 444  */
 445
 446 extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/,
 447                          unsigned /*unit*/,
 448                          const struct bench_timing */*t*/);
 449
 450 /*----- That's all, folks -------------------------------------------------*/
 451
 452 #ifdef __cplusplus
 453   }
 454 #endif
 455
 456 #endif