.nf
.B "#include <mLib/bench.h>"
.PP
-.ta 2n
+.ta 2n +2n +2n
.B "struct bench_time {"
.B " unsigned f;"
-.B " kludge64 s;"
-.B " uint32 ns;"
+.B " union {"
+.B " struct { kludge64 s; uint32 ns; } ts;"
+.B " clock_t clk;"
+.B " kludge64 rawns;"
+.B " } t;"
.B " kludge64 cy;"
.B "};"
.PP
.B " double cy;"
.B "};"
.PP
+.B "#define BTF_T0 0u"
+.B "#define BTF_T1 ..."
.B "struct bench_timerops {"
.BI " void (*describe)(struct bench_timer *" bt ", dstr *" d );
-.BI " void (*now)(struct bench_timer *" bt ", struct bench_time *" t_out );
+.ta 2n +\w'\fBint (*now)('u
+.BI " int (*now)(struct bench_timer *" bt ,
+.BI " struct bench_time *" t_out ", unsigned " f );
+.ta 2n +\w'\void (*diff)('u
+.BI " void (*diff)(struct bench_timer *" bt ,
+.BI " struct bench_timing *" delta_out ,
+.BI " const struct bench_time *" t0 ,
+.BI " const struct bench_time *" t1 );
.BI " void (*destroy)(struct bench_timer *" bt );
.B "};"
.B "struct bench_timer {"
Write a description of the timer to the dynamic string
.IR d .
.TP
-.IB tm ->ops->now( tm ", " t_out)
+.IB tm ->ops->now( tm ", " t_out ", " f )
Store the current time in
-.IR t_out .
+.BI * t_out \fR.
The
-.B struct bench_time
-used to represent the time reported by a timer
-is described in detail below.
+.B BTF_T1
+flag in
+.I f
+to indicate that this is the second call in a pair;
+leave it clear for the first call.
+(A fake
+.B BTF_T0
+flag is defined to be zero for symmetry.)
+Return zero on success
+.I or
+permanent failure;
+return \-1 if timing failed but
+trying again immediately has a reasonable chance of success.
+.TP
+.IB tm ->ops->diff( tm ", " delta_out ", " t0 ", " t1 )
+Store in
+.BI * delta_out
+the difference between the two times
+.I t0
+and
+.IR t1 .
.TP
.IB tm ->ops->destroy( tm )
Destroy the timer,
releasing all of the resources that it holds.
.PP
-A time, a reported by a timer, is represented by the
-.BR "struct bench_time" .
-A passage-of-time measurement is stored in the
-.B s
-and
-.B ns
-members, holding seconds and nanoseconds respectively.
-(A timer need not have nanosecond precision.
-The exact interpretation of the time \(en
-e.g., whether it measures wallclock time,
-user-mode CPU time,
-or total thread CPU time \(en
-is a matter for the specific timer implementation.)
-A cycle count is stored in the
-.B cy
-member.
-The
+A
+.B bench_timing
+structure reports the difference between two times,
+as determined by a timer's
+.B diff
+function.
+It has four members.
+.TP
.B f
-member stores flags:
+A flags word.
.B BTF_TIMEOK
-is set if the passage-of-time measurement
-.B s
-and
-.B ns
-are valid; and
+is set if the passage-of-time measurement in
+.B t
+is valid;
.B BTF_CYOK
-is set if the cycle count
+is set if the cycle count in
.B cy
is valid.
-Neither the time nor the cycle count need be measured
-relative to any particular origin.
The mask
.B BTF_ANY
covers the
.B BTF_CYOK
bits:
hence,
-.IB f &BTF_ANY
+.B f&BTF_ANY
is nonzero (true)
if the timer returned any valid timing information.
+.TP
+.B n
+The number of iterations performed by the benchmark function
+on its satisfactory run,
+multiplied by
+.IR base .
+.TP
+.B t
+The time taken for the satisfactory run of the benchmark function,
+in seconds.
+Only valid if
+.B BTF_TIMEOK
+is set in
+.BR f .
+.TP
+.B cy
+The number of CPU cycles used
+in the satisfactory run of the benchmark function,
+in seconds.
+Only valid if
+.B BTF_CYOK
+is set in
+.BR f .
+.PP
+A
+.B "struct bench_time"
+representats a single instant in time,
+as captured by a timer's
+.B now
+function.
+The use of this structure is a private matter for the timer:
+the only hard requirement is that the
+.B diff
+function should be able to compute the difference between two times.
+However, the intent is that
+a passage-of-time measurement is stored in the
+.B t
+union,
+a cycle count is stored in the
+.B cy
+member, and
+the
+.B f
+member stores flags
+.B BTF_TIMEOK
+and or
+.B BTF_CYOK
+if the passage-of-time or cycle count respectively are valid.
.
.SS The built-in timer
The function
The clock subtimers are as follows.
Not all of them will be available on every platform.
.TP
+.B linux-x86-perf-rdpmc-hw-cycles
+This is a dummy companion to the similarly named cycle subtimer;
+see its description below.
+.TP
.B posix-thread-cputime
Measures the passage of time using
.BR clock_gettime (2),
The cycle subtimers are as follows.
Not all of them will be available on every platform.
.TP
-.B linux-perf-event
-Counts CPU cycles using the Linux-specific
+.B linux-perf-read-hw-cycles
+Counts CPU cycles using the Linux-specific
.BR perf_event_open (2)
function to read the
.BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES
.B /proc/sys/kernel/perf_event_paranoid
level is too high.
.TP
-.B x86-rdtsc
-Counts CPU cycles using the x86
+.B linux-perf-rdpmc-hw-cycles
+Counts CPU cycles using the Linux-specific
+.BR perf_event_open (2)
+function,
+as for
+.B linux-x86-perf-read-hw-cycles
+above,
+except that it additionally uses the i386/AMD64
.B rdtsc
+and
+.B rdpmc
+instructions,
+together with information provided by the kernel
+through a memory-mapped page
+to do its measurements without any system call overheads.
+It does passage-of-time and cycle counting in a single operation,
+so no separate clock subtimer is required:
+the similarly-named clock subtimer does nothing
+except check that the
+.B linux-x86-perf-rdpmc-hw-cycles
+cycle subtimer has been selected.
+This is almost certainly the best choice if it's available.
+.TP
+.B x86-rdtscp
+Counts CPU cycles using the x86
+.B rdtscp
instruction.
This instruction is not really suitable for performance measurement:
it gives misleading results on CPUs with variable clock frequency.
.TP
+.B x86-rdtsc
+Counts CPU cycles using the x86
+.B rdtsc
+instruction.
+This has the downsides of
+.B rdtscp
+above,
+but also fails to detect when the thread has been suspended
+or transferred to a different CPU core
+and gives misleading answers in this case.
+Not really recommended.
+.TP
.B null
A dummy cycle counter,
which will initialize successfully
.PP
The built-in preference order for clock subtimers,
from most to least preferred, is
-.B posix-thread-cputime
+.BR linux-x86-perf-rdpmc-hw-cycles ,
followed by
+.BR posix-thread-cputime ,
+and finally
.BR stdc-clock .
The built-in preference order for cycle subtimers,
from most to least preferred, is
-.B linux-perf-event
+.BR linux-x86-perf-rdpmc-hw-cycles
+then
+.BR linux-x86-perf-read-hw-cycles ,
followed by
+.BR x86-rdtscp ,
+and
.BR x86-rdtsc ,
-and then
+and finally
.BR null .
.
.SS The benchmark state
If it fails \(en
most likely because the timer failed \(en
then it returns \-1.
-.PP
-A
-.B bench_timing
-structure reports the outcome of a successful measurement.
-It has four members.
-.TP
-.B f
-A flags word.
-.B BTF_TIMEOK
-is set if the passage-of-time measurement in
-.B t
-is valid;
-.B BTF_CYOK
-is set if the cycle count in
-.B cy
-is valid.
-.TP
-.B n
-The number of iterations performed by the benchmark function
-on its satisfactory run,
-multiplied by
-.IR base .
-.TP
-.B t
-The time taken for the satisfactory run of the benchmark function,
-in seconds.
-Only valid if
-.B BTF_TIMEOK
-is set in
-.BR f .
-.TP
-.B cy
-The number of CPU cycles used
-in the satisfactory run of the benchmark function,
-in seconds.
-Only valid if
-.B BTF_CYOK
-is set in
-.BR f .
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "linreg.h"
#include "macros.h"
+#if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__))
+# include <cpuid.h>
+# define CPUID_1D_TSC (1u << 4)
+# define CPUID_1xD_TSCP (1u << 27)
+#endif
+
+#if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
+# include <sys/types.h>
+# include <unistd.h>
+# include <linux/perf_event.h>
+# include <asm/unistd.h>
+# if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__))
+# include <sys/mman.h>
+# endif
+#endif
+
/*----- Data structures ---------------------------------------------------*/
enum { CLK, CY, NTIMER };
struct timer {
struct bench_timer _t;
const struct timer_ops *ops[NTIMER]; /* subtimers for clock and cycles */
- union { int fd; } u_cy; /* state for cycle measurement */
+ union {
+ unsigned tscaux; /* `ia32_tsc_aux' for `ldtscp' */
+ int fd; /* vanilla `perf_event_open' */
+ struct { const volatile void *map; size_t sz; } pmc; /* `perf_event_open'
+ * with `rdpmc' */
+ } u_cy; /* state for cycle measurement */
};
struct timer_ops {
unsigned f; /* flags */
#define TF_SECRET 1u /* don't try this automatically */
int (*init)(struct timer */*t*/); /* initialization function */
- void (*now)(struct bench_time *t_out, struct timer *t); /* read current */
- void (*teardown)(struct timer *t); /* release held resources */
+ int (*now)(struct timer */*t*/, /* read current */
+ struct bench_time */*t_out*/, unsigned /*f*/);
+ void (*diff)(struct timer */*t*/, /* difference */
+ struct bench_timing */*t_inout*/,
+ const struct bench_time */*t0*/,
+ const struct bench_time */*t1*/);
+ void (*teardown)(struct timer */*t*/); /* release held resources */
};
/*----- Preliminaries -----------------------------------------------------*/
}
}
-/* --- @timer_diff@ --- *
+/*----- Difference utilities ----------------------------------------------*/
+
+#ifdef HAVE_UINT64
+# define FLOATK64(k) ((double)(k).i)
+#else
+# define FLOATK64(k) ((double)(k).lo + 4294967296.0*(double)(k).hi)
+#endif
+
+/* --- @diff_ts@ --- *
*
- * Arguments: @struct bench_timing *delta_out@ = where to putt the result
- * @const struct bench_time *t0, *t1@ = two times captured by a
- * timer's @now@ function
+ * Arguments: @struct timer *t@ = timer structure
+ * @struct bench_timing *delta_inout@ = where to put the result
+ * @const struct time *t0, *t1@ = two input times
*
* Returns: ---
*
- * Use: Calculates the difference between two captured times. The
- * flags are set according to whether the differences are
- * meaningful; @delta_out->n@ is left unset.
+ * Use: Calculates a time difference for timers using the
+ * @struct timespec@-like time format.
*/
-static void timer_diff(struct bench_timing *delta_out,
- const struct bench_time *t0,
- const struct bench_time *t1)
+static void diff_ts(struct timer *t, struct bench_timing *delta_inout,
+ const struct bench_time *t0, const struct bench_time *t1)
{
unsigned f = t0->f&t1->f;
kludge64 k;
-#ifdef HAVE_UINT64
-# define FLOATK64(k) ((double)(k).i)
-#else
-# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi)
-#endif
+ if (f&BTF_TIMEOK) {
- if (!(f&BTF_TIMEOK))
- delta_out->t = 0.0;
- else {
- SUB64(k, t1->s, t0->s);
- delta_out->t = FLOATK64(k) - 1 +
- (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S;
- }
+ /* Calculate the integer difference in seconds. */
+ SUB64(k, t1->t.ts.s, t0->t.ts.s);
- if (!(f&BTF_CYOK))
- delta_out->cy = 0.0;
- else {
- SUB64(k, t1->cy, t0->cy);
- delta_out->cy = FLOATK64(k);
+ /* And apply the nanoseconds difference. To prevent underflow,
+ * pre-emptively borrow one from the integer difference.
+ */
+ delta_inout->t =
+ FLOATK64(k) - 1.0 +
+ (t1->t.ts.ns + NS_PER_S - t0->t.ts.ns)/(double)NS_PER_S;
+
+ /* Done. */
+ delta_inout->f |= BTF_TIMEOK;
}
+}
- delta_out->f = f;
+/* --- @diff_cycles@ --- *
+ *
+ * Arguments: @struct timer *t@ = timer structure
+ * @struct bench_timing *delta_inout@ = where to put the result
+ * @const struct time *t0, *t1@ = two input times
+ *
+ * Returns: ---
+ *
+ * Use: Calculates a time difference for cycle-counting timers.
+ */
-#undef FLOATK64
+static void diff_cycles(struct timer *t, struct bench_timing *delta_inout,
+ const struct bench_time *t0,
+ const struct bench_time *t1)
+{
+ unsigned f = t0->f&t1->f;
+ kludge64 k;
+
+ if (f&BTF_CYOK) {
+ SUB64(k, t1->cy, t0->cy); delta_inout->cy = FLOATK64(k);
+ delta_inout->f |= BTF_CYOK;
+ }
}
+#undef FLOATK64
+
/*----- The null timer ----------------------------------------------------*/
/* This is a timer which does nothing, in case we don't have any better
*/
static int null_init(struct timer *t) { return (0); }
-static void null_now(struct bench_time *t_out, struct timer *t) { ; }
+static int null_now(struct timer *t, struct bench_time *t_out, unsigned f)
+ { return (0); }
+static void null_diff(struct timer *t, struct bench_timing *delta_inout,
+ const struct bench_time *t0,
+ const struct bench_time *t1)
+ { ; }
static void null_teardown(struct timer *t) { ; }
static const struct timer_ops null_ops =
- { "null", 0, null_init, null_now, null_teardown };
+ { "null", 0, null_init, null_now, null_diff, null_teardown };
#define NULL_ENT &null_ops,
/*----- The broken clock --------------------------------------------------*/
static int broken_init(struct timer *t) { return (-1); }
static const struct timer_ops broken_ops =
- { "broken", TF_SECRET, broken_init, null_now, null_teardown };
+ { "broken", TF_SECRET, broken_init, null_now, null_diff, null_teardown };
#define BROKEN_ENT &broken_ops,
/*----- Linux performance counters ----------------------------------------*/
#if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
-#include <sys/types.h>
-#include <unistd.h>
+/* --- @perfevent_open@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: File descriptor, or %$-1$%.
+ *
+ * Use: Open a performance measurement descriptor set up to count CPU
+ * cycles.
+ */
-#include <linux/perf_event.h>
-#include <asm/unistd.h>
+static int perfevent_open(void)
+{
+ struct perf_event_attr attr = { 0 };
+ int fd;
-static void perfevent_now(struct bench_time *t_out, struct timer *t)
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.size = sizeof(attr);
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.disabled = 0;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+
+ fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+ if (fd < 0) {
+ debug("couldn't open perf event: %s", strerror(errno));
+ return (-1);
+ }
+
+ return (fd);
+}
+
+static int perfevent_now(struct timer *t,
+ struct bench_time *t_out, unsigned f)
{
ssize_t n;
n = read(t->u_cy.fd, &t_out->cy.i, sizeof(t_out->cy.i));
if (n != sizeof(t_out->cy.i)) {
debug("failed to read perf-event counter: %s", strerror(errno));
- return;
+ return (0);
}
- t_out->f |= BTF_CYOK;
+ t_out->f |= BTF_CYOK; return (0);
}
static void perfevent_teardown(struct timer *t)
static int perfevent_init(struct timer *t)
{
- struct perf_event_attr attr = { 0 };
struct bench_time tm;
+ int fd = -1, rc;
- attr.type = PERF_TYPE_HARDWARE;
- attr.size = sizeof(attr);
- attr.config = PERF_COUNT_HW_CPU_CYCLES;
- attr.disabled = 0;
- attr.exclude_kernel = 1;
- attr.exclude_hv = 1;
+ fd = perfevent_open(); if (!fd) { rc = -1; goto end; }
- t->u_cy.fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
- if (t->u_cy.fd < 0) {
- debug("couldn't open perf evvent: %s", strerror(errno));
- return (-1);
+ t->u_cy.fd = fd; tm.f = 0; perfevent_now(t, &tm, 0);
+ if (!(tm.f&BTF_CYOK)) { rc = -1; goto end; }
+ fd = -1; rc = 0;
+end:
+ if (fd != -1) close(fd);
+ return (rc);
+}
+
+static const struct timer_ops perfevent_ops =
+ { "linux-perf-read-hw-cycles", 0,
+ perfevent_init, perfevent_now, diff_cycles, perfevent_teardown };
+#define PERFEVENT_VANILLA_CYENT &perfevent_ops,
+
+# if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__))
+
+/* Special syscall-free version for x86 using `rdpmc' instruction. *
+ *
+ * This is a bit weird because it does both kinds of measurement in a single
+ * operation.
+ */
+
+static int perfevrdpmc_now(struct timer *t,
+ struct bench_time *t_out, unsigned f)
+{
+ const volatile struct perf_event_mmap_page *map = t->u_cy.pmc.map;
+ unsigned long long tsc = tsc, toff = toff, tenb = tenb;
+ unsigned long long cy = cy, cyoff = cyoff;
+ unsigned long long m, hi, lo;
+ unsigned tshift = tshift, tmult = tmult, q0, q1, ff;
+
+ /* Repeat until we can complete this job without the buffer changing in the
+ * middle.
+ */
+ q0 = map->lock;
+ __atomic_thread_fence(__ATOMIC_ACQ_REL);
+ for (;;) {
+ ff = 0;
+
+ /* Read the passage-of-time information. */
+ if (map->cap_user_time) {
+ tenb = map->time_enabled;
+ tsc = __builtin_ia32_rdtsc();
+ tshift = map->time_shift;
+ tmult = map->time_mult;
+ toff = map->time_offset;
+ ff |= BTF_TIMEOK;
+ }
+
+ /* Read the performance-counter information. */
+ if (map->cap_user_rdpmc) {
+ cy = __builtin_ia32_rdpmc(map->index - 1);
+ cyoff = map->offset;
+ ff |= BTF_CYOK;
+ }
+
+ /* Check the sequence number again. */
+ __atomic_thread_fence(__ATOMIC_ACQ_REL);
+ q1 = map->lock;
+ if (q0 == q1) break;
+ q0 = q1;
+ }
+
+ if (ff&BTF_TIMEOK) {
+ /* We have a raw reference-cycle count %$n$% (@tsc@), and parameters
+ * %$a$%, %$w$% and %$t_0$%, such that %$a n/2^w + t_0$% gives a time in
+ * nanoseconds.
+ */
+
+ m = (1ull << tshift) - 1;
+ hi = tsc >> tshift; lo = tsc&m;
+ t_out->t.rawns.i = hi*tmult + (lo*tmult >> tshift) + toff + tenb;
+ t_out->f |= BTF_TIMEOK;
}
- tm.f = 0; perfevent_now(&tm, t);
- if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); }
+ if (ff&BTF_CYOK) {
+ /* We have the cycle count. */
+ t_out->cy.i = cy + cyoff;
+ t_out->f |= BTF_CYOK;
+ }
return (0);
}
-static const struct timer_ops perfevent_ops =
- { "linux-perf-hw-cycles", 0,
- perfevent_init, perfevent_now, perfevent_teardown };
+static void perfevrdpmc_diff(struct timer *t,
+ struct bench_timing *delta_inout,
+ const struct bench_time *t0,
+ const struct bench_time *t1)
+{
+ unsigned f = t0->f&t1->f;
-# define PERFEVENT_CYENT &perfevent_ops,
+ if (f&BTF_TIMEOK) {
+ delta_inout->t = (t1->t.rawns.i - t0->t.rawns.i)/(double)NS_PER_S;
+ delta_inout->f |= BTF_TIMEOK;
+ }
+
+ if (f&BTF_CYOK) {
+ delta_inout->cy = t1->cy.i - t0->cy.i;
+ delta_inout->f |= BTF_CYOK;
+ }
+}
+
+static void perfevrdpmc_teardown(struct timer *t)
+ { munmap((/*unconst unvolatile*/ void *)t->u_cy.pmc.map, t->u_cy.pmc.sz); }
+
+static int perfevrdpmc_cyinit(struct timer *t)
+{
+ const volatile struct perf_event_mmap_page *map = 0;
+ unsigned a, b, c, d, q0, q1, f;
+ int pgsz, mapsz, fd = -1, rc;
+
+ /* We need `rdtsc' to do the passage-of-time measurement. */
+ if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC))
+ { debug("no `rdtsc' instrunction"); return (-1); }
+
+ /* The rules say we must allocate %$1 + 2^n$% pages, so we need to know how
+ * big a page is.
+ */
+ pgsz = sysconf(_SC_PAGESIZE);
+ if (pgsz < 0) {
+ debug("failed to discover page size!: %s", strerror(errno));
+ rc = -1; goto end;
+ }
+
+ /* Open the measurement descriptor and map it. */
+ fd = perfevent_open(); if (!fd) return (-1);
+ mapsz = 2*pgsz;
+ map = mmap(0, mapsz, PROT_READ, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ debug("failed to map perf event: %s", strerror(errno));
+ return (-1);
+ }
+
+ /* Check that it's revealed the necessary information. */
+ q0 = map->lock;
+ __atomic_thread_fence(__ATOMIC_ACQ_REL);
+ for (;;) {
+ f = 0;
+ if (map->cap_user_time) f |= BTF_TIMEOK;
+ if (map->cap_user_rdpmc) f |= BTF_CYOK;
+ __atomic_thread_fence(__ATOMIC_ACQ_REL);
+ q1 = map->lock;
+ if (q0 == q1) break;
+ q0 = q1;
+ }
+ if (!(f&BTF_TIMEOK))
+ { debug("kernel refused user time measurement"); rc = -1; goto end; }
+ if (!(f&BTF_TIMEOK))
+ { debug("kernel refused user cycle measurement"); rc = -1; goto end; }
+
+ /* All done. We can close the descriptor here: the mapping will keep the
+ * performance-measurement machinery alive.
+ */
+ t->u_cy.pmc.map = map; t->u_cy.pmc.sz = mapsz; map = 0; rc = 0;
+end:
+ if (fd != -1) close(fd);
+ if (map) munmap((/*unconst unvolatile*/ void *)map, mapsz);
+ return (rc);
+}
+
+static const struct timer_ops perfevrdpmc_cyops =
+ { "linux-x86-perf-rdpmc-hw-cycles", 0,
+ perfevrdpmc_cyinit, perfevrdpmc_now,
+ perfevrdpmc_diff, perfevrdpmc_teardown };
+
+static int perfevrdpmc_clkinit(struct timer *t)
+{
+ if (t->ops[CLK] != &perfevrdpmc_cyops) {
+ debug("linux-x86-perf-rdpmc-hw-cycles not set as cycle subtimer");
+ return(-1);
+ }
+ return (0);
+}
+
+static const struct timer_ops perfevrdpmc_clkops =
+ { "linux-x86-perf-rdpmc-hw-cycles", 0,
+ perfevrdpmc_clkinit, null_now,
+ null_diff, null_teardown };
+
+# define PERFEVENT_RDPMC_CLKENT &perfevrdpmc_clkops,
+# define PERFEVENT_RDPMC_CYENT &perfevrdpmc_cyops,
+
+# else
+# define PERFEVENT_RDPMC_CLKENT
+# define PERFEVENT_RDPMC_CYENT
+# endif
+
+# define PERFEVENT_CLKENT PERFEVENT_RDPMC_CLKENT
+# define PERFEVENT_CYENT PERFEVENT_RDPMC_CYENT PERFEVENT_VANILLA_CYENT
#else
+# define PERFEVENT_CLKENT
# define PERFEVENT_CYENT
#endif
* CPU frequency adjustments.
*/
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__))
-#include <cpuid.h>
-
-#define CPUID_1D_TSC (1u << 4)
-
-static void x86rdtsc_now(struct bench_time *t_out, struct timer *t)
- { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; }
+static int x86rdtsc_now(struct timer *t,
+ struct bench_time *t_out, unsigned f)
+ { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; return (0); }
static int x86rdtsc_init(struct timer *t)
{
if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC))
{ debug("no `rdtsc' instrunction"); return (-1); }
+ t->u_cy.tscaux = ~0u;
+ return (0);
+}
+
+static int x86rdtscp_now(struct timer *t,
+ struct bench_time *t_out, unsigned f)
+{
+ unsigned tscaux;
+ unsigned long long n;
+
+ n = __builtin_ia32_rdtscp(&tscaux);
+ if (!(f&BTF_T1))
+ t->u_cy.tscaux = tscaux;
+ else if (t->u_cy.tscaux != tscaux) {
+ debug("tscaux mismatch: new 0x%08x /= old 0x%08x",
+ tscaux, t->u_cy.tscaux);
+ return (-1);
+ }
+ t_out->cy.i = n; t_out->f |= BTF_CYOK; return (0);
+}
+
+static int x86rdtscp_init(struct timer *t)
+{
+ unsigned a, b, c, d;
+
+ if (!__get_cpuid(0x80000001, &a, &b, &c, &d) || !(d&CPUID_1xD_TSCP))
+ { debug("no `rdtscp' instrunction"); return (-1); }
return (0);
}
static const struct timer_ops x86rdtsc_ops =
- { "x86-rdtsc", 0, x86rdtsc_init, x86rdtsc_now, null_teardown };
+ { "x86-rdtsc", 0,
+ x86rdtsc_init, x86rdtsc_now, diff_cycles, null_teardown };
+static const struct timer_ops x86rdtscp_ops =
+ { "x86-rdtscp", 0,
+ x86rdtscp_init, x86rdtscp_now, diff_cycles, null_teardown };
-# define X86RDTSC_CYENT &x86rdtsc_ops,
+# define X86RDTSC_CYENT &x86rdtscp_ops, &x86rdtsc_ops,
#else
# define X86RDTSC_CYENT
#endif
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
-static void gettime_now(struct bench_time *t_out, struct timer *t)
+static int gettime_now(struct timer *t, struct bench_time *t_out, unsigned f)
{
struct timespec now;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now))
- { debug("error reading POSIX clock: %s", strerror(errno)); return; }
- ASSIGN64(t_out->s, now.tv_sec); t_out->ns = now.tv_nsec;
- t_out->f |= BTF_TIMEOK;
+ { debug("error reading POSIX clock: %s", strerror(errno)); return (0); }
+ ASSIGN64(t_out->t.ts.s, now.tv_sec); t_out->t.ts.ns = now.tv_nsec;
+ t_out->f |= BTF_TIMEOK; return (0);
}
static int gettime_init(struct timer *t)
{
struct bench_time tm;
- tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
+ tm.f = 0; gettime_now(t, &tm, 0); if (!tm.f&BTF_TIMEOK) return (-1);
return (0);
}
static const struct timer_ops gettime_ops =
- { "posix-thread-cputime", 0, gettime_init, gettime_now, null_teardown };
+ { "posix-thread-cputime", 0,
+ gettime_init, gettime_now, diff_ts, null_teardown };
# define GETTIME_CLKENT &gettime_ops,
#else
* guaranteed to be available, though it's not likely to be very good.
*/
-static void clock_now(struct bench_time *t_out, struct timer *t)
+static int clock_now(struct timer *t, struct bench_time *t_out, unsigned f)
{
- clock_t now, x;
- unsigned long s; uint32 ns;
+ clock_t now;
now = clock();
if (now == (clock_t)-1) {
debug("error reading standard clock: %s", strerror(errno));
- return;
+ return (0);
}
- x = now/CLOCKS_PER_SEC;
- if (x > ULONG_MAX) { debug("standard clock out of range"); return; }
-
- s = x; x = now - CLOCKS_PER_SEC*s;
- if (!(NS_PER_S%CLOCKS_PER_SEC))
- ns = x*(NS_PER_S/CLOCKS_PER_SEC);
- else if (NS_PER_S <= ULONG_MAX/CLOCKS_PER_SEC)
- ns = (x*NS_PER_S)/CLOCKS_PER_SEC;
- else
- ns = x*((NS_PER_S + 0.0)/CLOCKS_PER_SEC);
- ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK;
+ t_out->t.clk = now; t_out->f |= BTF_TIMEOK; return (0);
+}
+
+static void clock_diff(struct timer *t, struct bench_timing *delta_inout,
+ const struct bench_time *t0,
+ const struct bench_time *t1)
+{
+ unsigned f = t0->f&t1->f;
+
+ if (f&BTF_TIMEOK) {
+ delta_inout->t = (t1->t.clk - t0->t.clk)/(double)CLOCKS_PER_SEC;
+ delta_inout->f |= BTF_TIMEOK;
+ }
}
static int clock_init(struct timer *t)
{
struct bench_time tm;
- tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
+ tm.f = 0; clock_now(t, &tm, 0); if (!tm.f&BTF_TIMEOK) return (-1);
return (0);
}
static const struct timer_ops clock_ops =
- { "stdc-clock", 0, clock_init, clock_now, null_teardown };
+ { "stdc-clock", 0, clock_init, clock_now, clock_diff, null_teardown };
#define CLOCK_CLKENT &clock_ops,
/* Tables of timing sources. */
static const struct timer_ops
- *const clktab[] = { GETTIME_CLKENT CLOCK_CLKENT BROKEN_ENT 0 },
- *const cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_ENT BROKEN_ENT 0 };
+ *const clktab[] = { PERFEVENT_CLKENT
+ GETTIME_CLKENT
+ CLOCK_CLKENT
+ BROKEN_ENT
+ 0 },
+ *const cytab[] = { PERFEVENT_CYENT
+ X86RDTSC_CYENT
+ NULL_ENT
+ BROKEN_ENT
+ 0 };
static const struct timertab {
const char *what;
}
}
-static void timer_now(struct bench_timer *tm, struct bench_time *t_out)
+static int timer_now(struct bench_timer *tm,
+ struct bench_time *t_out, unsigned f)
{
struct timer *t = (struct timer *)tm;
unsigned i;
- for (i = 0; i < NTIMER; i++) t->ops[i]->now(t_out, t);
+ t_out->f = 0;
+ for (i = 0; i < NTIMER; i++) if (t->ops[i]->now(t, t_out, f)) return (-1);
+ return (0);
+}
+
+static void timer_diff(struct bench_timer *tm,
+ struct bench_timing *t_out,
+ const struct bench_time *t0,
+ const struct bench_time *t1)
+{
+ struct timer *t = (struct timer *)tm;
+ unsigned i;
+
+ t_out->f = 0;
+ for (i = 0; i < NTIMER; i++) t->ops[i]->diff(t, t_out, t0, t1);
}
static void timer_destroy(struct bench_timer *tm)
}
static const struct bench_timerops timer_ops =
- { timer_describe, timer_now, timer_destroy };
+ { timer_describe, timer_now, timer_diff, timer_destroy };
/* --- @bench_createtimer@ --- *
*
for (i = 0; i < NTIMER; i++) t->ops[i] = 0;
/* Try to set up the subtimers. */
- for (i = 0; i < NTIMER; i++)
+ for (i = NTIMER; i--; )
if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end;
/* All is done. */
static void do_nothing(unsigned long n, void *ctx)
{ while (n--) RELAX; }
+/* --- @measure@ --- *
+ *
+ * Arguments: @struct bench_state *b@ = bench state
+ * @struct bench_timing *delta_out@ = where to leave the timing
+ * @bench_fn *fn@ = function to measure
+ * @void *ctx@ = context for the function
+ * @double n@ = number of iterations
+ *
+ * Returns: ---
+ *
+ * Use: Run the function @n@ times, and report how long it took.
+ *
+ * This function deals with retrying the measurements if the
+ * timer reports a temporary failure, and all of the
+ * difficulties if @n@ is too large to fit in a machine integer.
+ */
+
+static void measure(struct bench_state *b, struct bench_timing *delta_out,
+ bench_fn *fn, void *ctx, double n)
+{
+ struct bench_timer *tm = b->tm;
+ struct bench_time t0, t1;
+ unsigned long n0, n1;
+ double R = ULONG_MAX;
+
+ if (n <= R) {
+ n0 = n;
+ do {
+ while (tm->ops->now(tm, &t0, BTF_T0));
+ fn(n0, ctx);
+ } while (tm->ops->now(tm, &t1, BTF_T1));
+ } else {
+ n1 = n/R; n0 = n - n1*R;
+ do {
+ while (tm->ops->now(tm, &t0, BTF_T0));
+ while (n1--) fn(ULONG_MAX, ctx);
+ fn(n0, ctx);
+ } while (tm->ops->now(tm, &t1, BTF_T1));
+ }
+ tm->ops->diff(tm, delta_out, &t0, &t1);
+}
+
/* --- @bench_calibrate@ --- *
*
* Arguments: @struct bench_state *b@ = bench state
int bench_calibrate(struct bench_state *b)
{
struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT;
- unsigned long n;
- unsigned i;
- struct bench_timer *tm = b->tm;
- struct bench_time t0, t1;
struct bench_timing delta;
- double r;
+ double n, r;
bench_fn *fn = LAUNDER(&do_nothing);
- unsigned f = BTF_ANY;
+ unsigned i, f = BTF_ANY;
int rc;
/* The model here is that a timing loop has a fixed overhead as we enter
if (b->f&BTF_CLB) return (b->f&BTF_ANY ? 0 : -1);
/* Exercise the inner loop a few times to educate the branch predictor. */
- for (i = 0; i < 10; i++)
- { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); }
+ for (i = 0; i < 50; i++) measure(b, &delta, fn, 0, 10000);
/* Now we measure idle loops until they take sufficiently long -- or we run
* out of counter.
*/
debug("calibrating...");
- n = 1;
+ n = 1.0;
for (;;) {
/* Measure @n@ iterations of the idle loop. */
- tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1);
- timer_diff(&delta, &t0, &t1); f &= delta.f;
+ measure(b, &delta, fn, 0, n); f &= delta.f;
if (!(f&BTF_TIMEOK)) { rc = -1; goto end; }
/* Register the timings with the regression machinery. */
linreg_update(&lr_clk, n, delta.t);
if (!(f&BTF_CYOK))
- debug(" n = %10lu; t = %12g s", n, delta.t);
+ debug(" n = %10.0f; t = %12g s", n, delta.t);
else {
linreg_update(&lr_cy, n, delta.cy);
- debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy);
+ debug(" n = %10.0f; t = %12g s, cy = %10.0f", n, delta.t, delta.cy);
}
/* If we're done then stop. */
if (n >= ULONG_MAX - n/3) break;
/* Update the counter and continue. */
- n += n/3 + 1;
+ n += n/3.0 + 1.0;
}
/* Now run the linear regression to extract the constant and per-iteration
int bench_measure(struct bench_state *b, struct bench_timing *t_out,
double base, bench_fn *fn, void *ctx)
{
- struct bench_timer *tm = b->tm;
- struct bench_time t0, t1;
- unsigned long n, nn;
+ double n, nn;
/* Make sure the state is calibrated and usable. */
if (!(b->f&BTF_CLB) && bench_calibrate(b)) return (-1);
* hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying
* again with %$n' = n + 1$% iterations will very likely work.
*/
- debug("measuring..."); n = 1;
+ debug("measuring..."); n = 1.0;
for (;;) {
- tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1);
- timer_diff(t_out, &t0, &t1);
+ measure(b, t_out, fn, ctx, n); t_out->f &= b->f;
if (!(t_out->f&BTF_TIMEOK)) return (-1);
- if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t);
- else debug(" n = %10lu; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy);
+ if (!(t_out->f&BTF_CYOK))
+ debug(" n = %10.0f; t = %12g", n, t_out->t);
+ else
+ debug(" n = %10.0f; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy);
+
if (t_out->t >= 0.707*b->target_s) break;
nn = n*b->target_s/t_out->t;
- if (nn > n) n = nn;
+ if (n > ULONG_MAX || nn > (unsigned long)n + 1) n = nn;
else n++;
}
/*----- Miscellaneous utility macros --------------------------------------*/
+/* --- @N@ --- *
+ *
+ * Arguments: @type v[]@ = an actual array, not a pointer
+ *
+ * Returns: The number of elements in @v@.
+ */
+
#define N(v) (sizeof(v)/sizeof(*(v)))
+/* --- @STR@ --- *
+ *
+ * Arguments: @x@ = some tokens
+ *
+ * Returns: A string literal containing the macro-expanded text of @x@.
+ */
+
#define MLIB__STR(x) #x
#define STR(x) MLIB__STR(x)
+/* --- @GLUE@ --- *
+ *
+ * Arguments: @x, y@ = two sequences of tokens
+ *
+ * Returns: A single token formed by gluing together the macro-expansions
+ * of @x@ and @y@.
+ */
+
#define MLIB__GLUE(x, y) x##y
#define GLUE(x, y) MLIB__GLUE(x, y)
+/* --- @STATIC_ASSERT@ --- *
+ *
+ * Arguments: @int cond@ = a condition
+ * @msg@ = a string literal message
+ *
+ * Returns: ---
+ *
+ * Use: Fail at compile time unless @cond@ is nonzero. The failure
+ * might report @msg@.
+ */
+
#ifdef static_assert
# define STATIC_ASSERT(cond, msg) static_assert(!!(cond), msg)
#else
IGNORABLE extern char static_assert_failed[2*!!(cond) - 1]
#endif
+/* --- @COMMA@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: A `%|,|%' token, which can be usefully passed to macros to
+ * avoid argument splitting.
+ */
+
#define COMMA ,
/*----- String and character hacks ----------------------------------------*/
+/* --- @IS...@ --- *
+ *
+ * Arguments: @int ch@ = a character code, but not @EOF@
+ *
+ * Returns: Nonzero if @ch@ is in the relevant @<ctype.h>@ category.
+ *
+ * Use: Classifies characters, but safely even if characters are
+ * signed.
+ *
+ * There is a macro for each of the @<ctype.h>@ @is...@
+ * functions.
+ */
+
#define CTYPE_HACK(func, ch) (func((unsigned char)(ch)))
#define ISALNUM(ch) CTYPE_HACK(isalnum, ch)
#define ISUPPER(ch) CTYPE_HACK(isupper, ch)
#define ISXDIGIT(ch) CTYPE_HACK(isxdigit, ch)
+/* --- @TO...@ --- *
+ *
+ * Arguments: @int ch@ = a character code, but not @EOF@
+ *
+ * Returns: The converted character code.
+ *
+ * Use: Converts characters, but safely even if characters are
+ * signed.
+ *
+ * There is a macro for each of the @<ctype.h>@ @to...@
+ * functions.
+ */
+
#define TOASCII(ch) CTYPE_HACK(toascii, ch)
#define TOLOWER(ch) CTYPE_HACK(tolower, ch)
#define TOUPPER(ch) CTYPE_HACK(toupper, ch)
+/* --- @MEMCMP@, @STRCMP@, @STRNCMP@ --- *
+ *
+ * Arguments: @const type *x, *y@ = pointers to strings
+ * @op@ = a relational operator symbol
+ * @size_t n@ = length of the strings
+ *
+ * Returns: Nonzero if the relationship between the strings satisfies the
+ * operator @op@, otherwise zero.
+ *
+ * Use: These macros mitigate the author's frequent error of failing
+ * to compare the result of the underlying standard functions
+ * against zero, effectively reversing the sense of an intended
+ * test for equality.
+ */
+
#define MEMCMP(x, op, y, n) (memcmp((x), (y), (n)) op 0)
#define STRCMP(x, op, y) (strcmp((x), (y)) op 0)
#define STRNCMP(x, op, y, n) (strncmp((x), (y), (n)) op 0)
-/*----- Compiler diagnostics ----------------------------------------------*/
+/*----- Compiler-specific definitions -------------------------------------*/
-/* --- Compiler-specific definitions --- */
+/* The descriptions of these are given below, with the fallback
+ * definitions.
+ */
#if GCC_VERSION_P(2, 5) || CLANG_VERSION_P(3, 3)
# define NORETURN __attribute__((__noreturn__))
/* --- Fallback definitions, mostly trivial --- */
-#ifndef DEPRECATED
-# define DEPRECATED(msg)
-#endif
-
-#ifndef EXECL_LIKE
-# define EXECL_LIKE(ntrail)
-#endif
+/* --- @DISCARD@ --- *
+ *
+ * Arguments: @x@ = a function call
+ *
+ * Returns: ---
+ *
+ * Use: Explicitly discard the result of @x@. This counteracts a
+ * @MUST_CHECK@ attribute on the called function.
+ */
#ifndef DISCARD
# define DISCARD(x) do if (x); while (0)
#endif
+/* --- @IGNORE@ --- *
+ *
+ * Arguments: @x@ = any expression
+ *
+ * Returns: ---
+ *
+ * Use: Ignore the value of @x@, overriding compiler warnings.
+ */
+
#ifndef IGNORE
# define IGNORE(x) ((void)(x))
#endif
-#ifndef MUFFLE_WARNINGS_DECL
-# define MUFFLE_WARNINGS_DECL(warns, body) body
-#endif
+/* --- @LAUNDER@ --- *
+ *
+ * Arguments: @x@ = some integer expression
+ *
+ * Returns: @x@.
+ *
+ * Use: Causes a compiler to know nothing about the value of @x@,
+ * even if it looks obvious, e.g., it's a constant.
+ */
-#ifndef MUFFLE_WARNINGS_EXPR
-# define MUFFLE_WARNINGS_EXPR(warns, body) (body)
+#ifndef LAUNDER
+# define LAUNDER(x) (x)
#endif
-#ifndef MUFFLE_WARNINGS_STMT
-# define MUFFLE_WARNINGS_STMT(warns, body) do { body } while (0)
-#endif
+/* --- @RELAX@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: ---
+ *
+ * Use: Does nothing, but the compiler doesn't know that.
+ */
-#ifndef PRINTF_LIKE
-# define PRINF_LIKE(fmtix, argix)
+#ifndef RELAX
+# define RELAX
#endif
-#ifndef SCANF_LIKE
-# define SCANF_LIKE(fmtix, argix)
+/* --- @DEPRECATED@, @NORETURN@, @IGNORABLE@, @MUST_CHECK@ --- *
+ *
+ * Use: These are (mostly) function attributes; write them among the
+ * declaration specifiers for a function definition or
+ * declaration. These may not do anything, but the intended
+ * behaviour is as follows.
+ *
+ * * @DEPRECATED(msg)@ -- report a warning, quoting the string
+ * literal @msg@, if the function is called.
+ *
+ * * @NORETURN@ -- promise that the function doesn't return to
+ * its caller: either it kills the process, or it performs
+ * some nonlocal transfer.
+ *
+ * * @IGNORABLE@ -- the item (which might be data rather than
+ * a function) might not be referred to, but that's OK:
+ * don't warn about it.
+ *
+ * @ @MUST_CHECK@ -- warn if the return value of a function is
+ * ignored. Use @DISCARD@ if you really don't care.
+ */
+
+#ifndef DEPRECATED
+# define DEPRECATED(msg)
#endif
#ifndef NORETURN
# define MUST_CHECK
#endif
-#ifndef LAUNDER
-# define LAUNDER
+/* --- @PRINTF_LIKE@, @SCANF_LIKE@, @EXECL_LIKE@ --- *
+ *
+ * Arguments: @int fmtix@ = format string argument index (starting from 1)
+ * @int argix@ = variable format argument tail index (starting
+ * from 1)
+ * @int ntrail@ = number of arguments following terminator
+ *
+ * Use: These are function attributes. Again, they might not do
+ * anything at all. By intention, they give the compiler
+ * information about a variadic function's arguments, so that it
+ * can warn about misuse.
+ *
+ * * @PRINTF_LIKE@ -- the function takes a @printf@-style
+ * format string as argument @fmtix@ and an argument tail
+ * (which may be empty) beginning with argument @argix@.
+ *
+ * * @SCANF_LIKE@ -- the function takes a @scanf@-style
+ * format string as argument @fmtix@ and an argument tail
+ * (which may be empty) beginning with argument @argix@.
+ *
+ * * @EXECL_LIKE@ -- the function takes a sequence of pointer
+ * arguments terminated by a null pointer, followed by
+ * @ntrail@ further arguments.
+ */
+
+#ifndef PRINTF_LIKE
+# define PRINF_LIKE(fmtix, argix)
#endif
-#ifndef RELAX
-# define RELAX
+#ifndef SCANF_LIKE
+# define SCANF_LIKE(fmtix, argix)
#endif
+#ifndef EXECL_LIKE
+# define EXECL_LIKE(ntrail)
+#endif
+
+/* --- @MUFFLE_WARNINGS_...@ --- *
+ *
+ * Arguments: @warns@ = a sequence of @..._WARNING@ calls (see below)
+ * @body@ = some program text
+ *
+ * Use: Muffle specific warnings within the program text.
+ *
+ * For @MUFFLE_WARNINGS_DECL@, the program text is a
+ * declaration; for @MUFFLE_WARNINGS_EXPR@, it is an expression,
+ * and for @MUFFLE_WARNINGS_STMT@, it is a statement.
+ *
+ * The warnings to be muffled are given as a list of
+ * @..._WARNING@ macros, with no separators. The list can
+ * list warnings from multiple different compilers: entries for
+ * irrelevant compilers will be ignored.
+ */
+
+#ifndef MUFFLE_WARNINGS_DECL
+# define MUFFLE_WARNINGS_DECL(warns, body) body
+#endif
+
+#ifndef MUFFLE_WARNINGS_EXPR
+# define MUFFLE_WARNINGS_EXPR(warns, body) (body)
+#endif
+
+#ifndef MUFFLE_WARNINGS_STMT
+# define MUFFLE_WARNINGS_STMT(warns, body) do { body } while (0)
+#endif
+
+/* --- @GCC_WARNING@ --- *
+ *
+ * Arguments: @warn@ = a string literal naming a warning, with `%|-W...|%'
+ * prefix
+ *
+ * Use: Names a GCC warning: use within @MUFFLE_WARNINGS_...@.
+ *
+ * Note that GCC's warning suppression is very buggy.
+ */
+
#ifndef GCC_WARNING
# define GCC_WARNING(warn)
#endif
+/* --- @CLANG_WARNING@ --- *
+ *
+ * Arguments: @warn@ = a string literal naming a warning, with `%|-W...|%'
+ * prefix
+ *
+ * Use: Names a Clang warning: use within @MUFFLE_WARNINGS_...@.
+ */
+
#ifndef CLANG_WARNING
# define CLANG_WARNING(warn)
#endif