. ds /= \(!=
. ds <= \(<=
. ds >= \(>=
+. ds mu \(mu
+. ds sr \(sr
. ds ' \(fm
. ds , \h'\w'\ 'u/2u'
. if \n(.g \{\
. ds se
. ds us _
. ds ue
+. ds mu *
+. ds sr sqrt
. ds ' \(aq
. ds *d \,\fIdelta\/\fP
. ds /= /=
The macro
.B ALLOCV_SAFE_P
returns nonzero if the product
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
is representable in type
.B size_t
and zero otherwise;
i.e., it returns true if it would be safe to try to allocate
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
bytes.
The macro
.BR A_ALLOCV
such that
.I n
iterations of the computation take more than
-.IB b ->target_s "" \fR/\(sr2
+.IB b ->target_s "" \fR/\*(sr2
seconds.
If measurement fails,
then
is filled in with the measurement;
.IB t_out ->n
is set to
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
.PP
The
.B BENCH_MEASURE_TAG
flag must be set in
.IB t ->f \fR.
If the timing is sufficient \(en if
-.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+.IR t\fB->t "\ \*(>=\ " target_s /\*(sr2
\(en then
.B bench_adapt
returns a nonzero value to indicate that measurement is complete.
and
.IB t ->n
is set to the product
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
.
.SS Reporting results
The
static int eqish_floating_p(double x, double y,
const struct tvec_floatinfo *fi)
{
- double t;
+ double t, u;
+ /* NaNs and infinities are equal only to each other. */
if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
+ /* Compare finite values. */
switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
case TVFF_EXACT:
return (x == y && NEGP(x) == NEGP(y));
case TVFF_ABSDELTA:
- t = x - y; if (t < 0) t = -t; return (t < fi->delta);
+ t = fabs(y - x); return (t < fi->delta);
case TVFF_RELDELTA:
- t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
+ t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
+ return (t <= u);
default:
abort();
}
/* Predefined floating-point ranges. */
const struct tvec_floatinfo
- tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
- -FLT_MAX, FLT_MAX, 0.0 },
+ tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
+ -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
-DBL_MAX, DBL_MAX, 0.0 },
tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
/* -*-c-*-
*
- * Floating-point format conversions
+ * Direct floating-point format conversions
*
* (c) 2024 Straylight/Edgeware
*/
_(float, flt, f32) \
_(double, dbl, f64)
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+# define FROB_NANS
+#endif
+
#define CONV_DECLS_flt_f32 uint32 t
#if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32
# if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE
# else
# error "unimplemented byte order"
# endif
-# ifdef FLTFMT__MUST_FROB_NANS
-# define CONV_FROB_flt_f32 do { FLTFMT__FROB_NAN_F32(&t, rc); } while (0)
+# ifdef FROB_NANS
+# define CONV_FROBNANflt_f32 do { \
+ if ((t&0x7f800000) != 0x7f800000 || !(t&0x007fffff)) \
+ ; \
+ else if (t&0x003fffff) \
+ t ^= 0x00400000; \
+ else { \
+ t = (t&0x80000000) | 0x00000001; \
+ rc |= FLTERR_INEXACT; \
+ } \
+ } while (0)
# else
-# define CONV_FROB_flt_f32 do ; while (0)
+# define CONV_FROBNANflt_f32 do ; while (0)
# endif
#else
# define CONV_LOAD_flt_f32 do { \
rc |= fltfmt_encflt(z_out, &u, r); \
fltfmt_freebits(&u); \
} while (0)
-# define CONV_FROB_flt_f32 do ; while (0)
+# define CONV_FROBNANflt_f32 do ; while (0)
#endif
#define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0)
#define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0)
# else
# error "unimplemented byte order"
# endif
-# ifdef FLTFMT__MUST_FROB_NANS
-# define CONV_FROB_dbl_f64 do { \
- uint32 u[2]; \
- u[0] = HI64(t); u[1] = LO64(t); \
- FLTFMT__FROB_NAN_F64(&u, rc); \
- SET64(t, u[0], u[1]); \
+# ifdef FROB_NANS
+# define CONV_FROBNANdbl_f64 do { \
+ kludge64 u, v; \
+ SET64(u, 0x7ff00000, 0x00000000); AND64(v, t, u); \
+ if (CMP64(v, ==, u)) { \
+ SET64(u, 0x000fffff, 0xffffffff); AND64(v, t, u); \
+ if (!ZERO64(v)) { \
+ SET64(u, 0x0007ffff, 0xffffffff); AND64(v, t, u); \
+ if (!ZERO64(v)) \
+ { SET64(u, 0x00080000, 0x00000000); XOR64(t, t, u); } \
+ else { \
+ SET64(u, 0x80000000, 0x00000000); AND64(t, t, u); \
+ SET64(u, 0x00000000, 0x00000001); OR64(t, t, u); \
+ rc |= FLTERR_INEXACT; \
+ } \
+ } \
+ } \
} while (0)
# else
-# define CONV_FROB_dbl_f64 do ; while (0)
+# define CONV_FROBNANdbl_f64 do ; while (0)
# endif
#else
# define CONV_LOAD_dbl_f64 do { \
rc |= fltfmt_encdbl(z_out, &u, r); \
fltfmt_freebits(&u); \
} while (0)
-# define CONV_FROB_dbl_f64 do ; while (0)
+# define CONV_FROBNANdbl_f64 do ; while (0)
#endif
#define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0)
#define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0)
unsigned rc = 0; CONV_DECLS_##cty##_##fty; \
\
CONV_LOAD_##cty##_##fty; \
- CONV_FROB_##cty##_##fty; \
+ CONV_FROBNAN##cty##_##fty; \
CONV_STOREL_##cty##_##fty; \
return (rc); \
} \
unsigned rc = 0; CONV_DECLS_##cty##_##fty; \
\
CONV_LOAD_##cty##_##fty; \
- CONV_FROB_##cty##_##fty; \
+ CONV_FROBNAN##cty##_##fty; \
CONV_STOREB_##cty##_##fty; \
return (rc); \
}
unsigned rc = 0; CONV_DECLS_##cty##_##fty; \
\
CONV_LOADL_##cty##_##fty; \
- CONV_FROB_##cty##_##fty; \
+ CONV_FROBNAN##cty##_##fty; \
CONV_STORE_##cty##_##fty; \
return (rc); \
} \
unsigned rc = 0; CONV_DECLS_##cty##_##fty; \
\
CONV_LOADB_##cty##_##fty; \
- CONV_FROB_##cty##_##fty; \
+ CONV_FROBNAN##cty##_##fty; \
CONV_STORE_##cty##_##fty; \
return (rc); \
}
.TP
.B FLTERR_UFLOW
The conversion underflowed:
-a nonzero input was too tiny (in asbolute value) to represent,
+a nonzero input was too tiny (in absolute value) to represent,
and a zero result was returned.
.TP
.B FLTERR_OFLOW
are mask with set bits corresponding to their respective predicates.
Bitwise boolean logic can be applied to these masks
in order to calculate the masks corresponding to
-the same logical expresssion applied to the individual predicates.
+the same logical expression applied to the individual predicates.
.B FRPMASK_INEXACT holds if
.B LOW
or
This is likely a good option
if there is no compelling argument for a different specific choice.
.
+.SS Direct conversions
+The functions
+.B fltfmt_flttof32l
+and
+.B fltfmt_flttof32b
+convert a
+.B float
+argument to an IEEE\ 754 Binary32 value
+in little- or big-endian byte order, respectively;
+similarly
+.B fltfmt_dbltof64l
+and
+.B fltfmt_dbltof64b
+convert a
+.B double
+argument to an IEEE\ 754 Binary64 value
+in little- or big-endian byte order, respectively.
+The value to convert is given as
+.I x
+and the result is written at the address
+.IR p .
+.PP
+The functions
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary32 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR float ;
+similarly,
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary64 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR double .
+The value to convert is read from address
+.I p
+and the result is written to
+.RI * z_out \fR.
+.PP
+Both functions additionally take a rounding mode
+.I r
+which is applied if the conversion cannot be performed exactly,
+and return an error code as described above.
+.PP
+On many modern platforms, the
+.B float
+and
+.I double
+types are represented internally using the IEEE
+Binary32 and Binary64 formats,
+so these conversions are trivial, or nearly so.
+A complication arises on PA-RISC and older MIPS processors:
+see the descriptions of
+.B fltfmt_encieee
+and
+.B fltfmt_decdbl
+below for the details.
+.PP
+On other platforms,
+the conversion is decidedly nontrivial,
+and makes use of the machinery described below;
+this may also be useful for more complex conversions.
+.
.SS The floatbits structure
In order to avoid a combinatorial explosion in conversion operations,
all the basic conversions involve,
.B FLTF_NEG
is clear,
then the number represented is
-.IR s "\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.IR s "\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
.PP
A
.B struct floatbits
leaving
.B frac
null.
+In this state, it is safe to modify the arena pointer
+.B a
+if the default initialization is unsatisfactory.
.PP
The
.B fltfmt_allocfrac
.IR e "\ =\ \-" e \*(us0\*(ue
then the value is zero or a subnormal,
with the value
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e +1\*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e +1\*(se.
In particular,
if
.IR m "\ =\ 0"
If
.RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1"
then the value is a (supposedly) normal number
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
If
.I h
is zero then
The precision;
in the notation above this is
.IR p .
-.PP
-The following IEEE formats descriptions are already defined.
-.TP
-.B "fltfmt_f16"
-The IEEE\ 754 Binary16 format, with
-.IR w "\ =\ 5,"
-.IR p "\ =\ 11,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f32"
-The IEEE\ 754 Binary32 (`single precision') format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 24,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f64"
-The IEEE\ 754 Binary64 (`double precision') format, with
-.IR w "\ =\ 11,"
-.IR p "\ =\ 53,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f128"
-The IEEE\ 754 Binary128 (`quad precision') format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 113,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_mini"
-An eight-bit `minifloat' format, with
-.IR w "\ =\ 4,"
-.IR p "\ =\ 4,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_bf16"
-The Google `BFloat16' format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 8,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_idblext80"
-The Intel 8087 80-bit `double extended' format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 64,"
-and
-.IR h "\ =\ 1."
-.PP
+
The
.B fltfmt_encieee
and
if the input is a finite value,
then the significand is rounded to fit
according to the requested rounding mode.
+If a signalling NaN ends up with all of its payload bits zero,
+as a result of truncation or otherwise,
+then the least-signficant bit of the output payload is forced on
+in order to distinguish the result from an infinity.
The possible errors are
.B FLTERR_UFLOW
if the value is unrepresentably tiny,
with the unit bit interpreted as encoded in finite numbers,
and discarded in infinities and NaNs.
.PP
-For each of the format
-
-
-
+A number of IEEE and IEEE-like formats are predefined:
+for format
+.IR fmt ,
+there is
+an IEEE format description, named
+.BI fltfmt_ fmt \fR,
+together with encoding and decoding functions, named
+.BI fltfmt_enc fmt
+and
+.BI fltfmt_dec fmt \fR;
+for the most part,
+these functions use more convenient types
+to hold encoded values.
+.TP
+.B "f16"
+The IEEE\ 754 Binary16 format, with
+.IR w "\ =\ 5,"
+.IR p "\ =\ 11,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_f32"
+The IEEE\ 754 Binary32 (`single precision') format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 24,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint32 .
+.TP
+.B "fltfmt_f64"
+The IEEE\ 754 Binary64 (`double precision') format, with
+.IR w "\ =\ 11,"
+.IR p "\ =\ 53,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.B kludge64
+(see
+.BR bits (3)
+for details).
+.TP
+.B "fltfmt_f128"
+The IEEE\ 754 Binary128 (`quad precision') format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 113,"
+and
+.IR h "\ =\ 0;"uint
+stored in a big-endian vector of
+.BR uint32 ,
+just as for the generic functions described above.
+.TP
+.B "fltfmt_mini"
+An eight-bit `minifloat' format, with
+.IR w "\ =\ 4,"
+.IR p "\ =\ 4,"
+and
+.IR h "\ =\ 0;"
+stored in an
+.BR octet .
+.TP
+.B "fltfmt_bf16"
+The Google `BFloat16' format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 8,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_idblext80"
+The Intel 8087 80-bit `double extended' format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 64,"
+and
+.IR h "\ =\ 1;"
+stored as a
+.B uint16
+holding the sign and exponent,
+and a
+.B kludge64
+holding the significand.
+.
+.SS Native formats
+There are also functions for converting between
+.B struct floatbits
+and the implementation's native floating-point types
+.B float
+(abbreviated
+.BR flt ),
+.B double
+(abbreviated
+.BR dbl ),
+and
+.B "long double"
+(abbreviated
+.BR ldbl ).
+.PP
+For each native type abbreviation
+.IR ty ,
+there are functions
+.BI fltfmt_enc ty
+and
+.BI fltfmt_dec ty \fR,
+which respectively convert the value held in
+.B struct floatbits
+to or from a value of the corresponding C type.
+(The functions acting on
+.B long double
+values are only available if the platform supports C99 or later.)
+.PP
+The
+.BI fltfmt_enc ty
+functions read an input value from a
+.B struct floatbits
+pointer
+.I x
+and store the encoded result through a pointer
+.I z_out
+to the appropriate C type;
+the function also receives a rounding mode
+.IR r ,
+but see below.
+The
+.BI fltfmt_dec ty
+functions are given an input value of the appropriate C type,
+and store the decoded result in a
+.B struct floatbits
+structure pointed to by
+.I z_arg ;
+again, the function also receives a rounding mode
+.IR r ,
+but see below.
+.PP
+These functions can use two different strategies for conversion.
+If the compile-time configuration step detects
+that the implementation is using
+a specific, supported format for a native type,
+then conversions involving the native type
+are performed using the existing machinery for that format.
+For example, if,
+as is in fact nearly universal on modern-ish systems,
+the
+.B double
+type uses the IEEE\ 754 Binary64 format,
+then
+.B fltfmt_encdbl
+and
+.B fltfmt_decdbl
+use the functions
+.B fltfmt_encf64
+and
+.B fltfmt_decf64
+described above for the conversion.
+This approach has the benefit that
+everything is done under the control of the
+.B fltfmt
+machinery,
+which can faithfully preserve signs of zero values,
+and NaN payloads.
+The error conditions are, for the most part, the same as for the
+.B fltfmt_encieee
+and
+.B fltfmt_decieee
+functions described above.
+The encoding functions have an additional source of inexactness
+on PA-RISC and older MIPS processors
+which use the reversed quiet/signalling NaN convention:
+a quiet NaN with an all-zero payload
+is not representable on such implementations
+(the encoding is an infinity instead);
+in this situation,
+the least significant payload bit is forced on,
+just as if the payload required truncation,
+and
+.B FLTERR_INEXACT
+is returned.
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
#include "bits.h"
#include "fltfmt.h"
#include "growbuf.h"
-#include "macros.h"
#include "maths.h"
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding. Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%. But
+ * that doesn't actually work on GCC, or on Clang from not too long ago. So
+ * use compiler-specific hacking to support this.
+ */
#if GCC_VERSION_P(4, 4)
# pragma GCC optimize "-frounding-math"
#elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
return (rc);
}
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
/* IEEE (and related) format descriptions. */
const struct fltfmt_ieeefmt
/* Copy the payload.
*
* If the payload is all-zero and we're meant to set a signalling NaN
- * then report an exactness failure and set the low bit.
+ * then report an exactness failure and set the least-significant bit.
*/
mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
- for (i = 0; i < nw - mw; i++) z[i] = 0;
- n = x->n; if (n > mw) n = nw;
- t = shr(z + i, x->frac, n, sh); i += n;
- if (i < nw) z[i++] = t;
- sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
- if (f&FLTF_QNAN) z0 |= B32(sh);
- else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+ n = x->n;
+ if (n < mw) j = 0;
+ else { n = mw; j = sh; }
+ if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+ ERR(FLTERR_INEXACT);
+ n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+ z[i++] = 1;
+ } else {
+ for (i = 0; i < nw - mw; i++) z[i] = 0;
+ n = x->n; if (n > mw) n = mw;
+ t = shr(z + i, x->frac, n, sh); i += n;
+ if (i < nw) z[i++] = t;
+ sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+ if (f&FLTF_QNAN) z0 |= B32(sh);
+ }
/* Set the exponent and, for non-hidden-bit formats, the unit bit. */
z0 |= M32(fmt->expwd) << esh;
# define DIGIT_BITS 4
#endif
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+# define FROB_NANS
+#endif
+
/* --- @ENCFLT@ --- *
*
* Arguments: @ty@ = the C type to encode
#endif
+#ifdef FROB_NANS
+# define FROBNAN_ENCDECLS struct floatbits _y
+# define FROBNAN_ENC do { \
+ if (_x->f&FLTF_NANMASK) { \
+ _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n; \
+ _x = &_y; \
+ } \
+ } while (0)
+#else
+# define FROBNAN_ENCDECLS
+# define FROBNAN_ENC do ; while (0)
+#endif
+
#define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do { \
+ const struct floatbits *_x = (x); \
unsigned _rc = 0; \
+ FROBNAN_ENCDECLS; \
\
/* See if the native format is one that we recognize. */ \
switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \
uint32 _t[1]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_F32(_t, _rc); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: STORE32_B(_z, _t[0]); break; \
case FLTFMT_LE: STORE32_L(_z, _t[0]); break; \
case FLTFMT_IEEE_F64: { \
uint32 _t[2]; \
unsigned char *_z = (unsigned char *)(z_out); \
- (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_F64(_t, _rc); \
+ \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \
uint32 _t[4]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- FLTFMT__FROB_NAN_F128(_t, _rc); \
- (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \
uint32 _t[3]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_idblext80, \
+ _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE16_B(_z + 0, _t[0]); \
default: { \
/* We must do this the hard way. */ \
\
- const struct floatbits *_x = (x); \
ty _z; \
unsigned _i; \
ENC_ROUND_DECLS; \
} while (0)
#endif
+#ifdef FROB_NANS
+# define FROBNAN_DEC do { \
+ if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK; \
+ } while (0)
+#else
+# define FROBNAN_DEC do ; while (0)
+#endif
+
#define DECFLT(ty, TY, frexp, rc, z_out, x, r) do { \
+ struct floatbits *_z = (z_out); \
unsigned _rc = 0; \
\
switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \
case FLTFMT_LE: _t[0] = LOAD32_L(_x); break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F32(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_IEEE_F64: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F64(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_IEEE_F128: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F128(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_INTEL_F80: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC; \
} break; \
\
default: { \
- struct floatbits *_z = (z_out); \
ty _x = (x), _y; \
unsigned _i, _n, _f = 0; \
uint32 _t; \
static const struct tvec_test round_test =
{ "round", round_regs, 0, test_round };
-/*----- IEEE format conversion --------------------------------------------*/
+/*----- IEEE format conversions -------------------------------------------*/
#define IEEE_FORMATS(_) \
_(mini, 1) \
#define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test,
#define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST)
-/*----- Native format conversion ------------------------------------------*/
+/*----- Native format conversions -----------------------------------------*/
#define NATIVE_FORMATS(_) \
_(flt, float, FLT) \
static const struct tvec_flaginfo assume_flaginfo =
{ "assume", assume_flags, &tvrange_uint };
-struct nativeenv { struct tvec_env _env; unsigned ntv; };
-struct nativectx { unsigned af, want; };
+struct assumeenv { struct tvec_env _env; unsigned ntv; };
+struct assumectx { unsigned af, want; };
-static void setup_native(struct tvec_state *tv, const struct tvec_env *env,
+static void setup_assume(struct tvec_state *tv, const struct tvec_env *env,
void *pctx, void *ctx)
{
- const struct nativeenv *nenv = (const struct nativeenv *)env;
- const struct ntvinfo *info = &ntvinfo[nenv->ntv];
- struct nativectx *nctx = ctx;
+ const struct assumeenv *aenv = (const struct assumeenv *)env;
+ const struct ntvinfo *info = &ntvinfo[aenv->ntv];
+ struct assumectx *actx = ctx;
double prec;
switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {
case FLTFMT_IEEE_F32:
- nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
+ actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
break;
case FLTFMT_IEEE_F64:
- nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
+ actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
break;
case FLTFMT_IEEE_F128:
- nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+ actx->af = AF_NEGZ | AF_INF | AF_IEEE |
AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113;
break;
case FLTFMT_INTEL_F80:
- nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+ actx->af = AF_NEGZ | AF_INF | AF_IEEE |
AF_PREC24 | AF_PREC53 | AF_PREC64;
break;
default:
- nctx->af = 0;
- if (NEGP(-0.0)) nctx->af |= AF_NEGZ;
+ actx->af = 0;
+ if (NEGP(-0.0)) actx->af |= AF_NEGZ;
#ifdef INF
- nctx->af |= AF_INF;
+ actx->af |= AF_INF;
#endif
#ifdef NAN
- nctx->af |= AF_STDCNAN;
+ actx->af |= AF_STDCNAN;
#endif
prec = log(FLT_RADIX)/log(2.0)*info->mant_dig;
- if (prec >= 24) nctx->af |= AF_PREC24;
- if (prec >= 53) nctx->af |= AF_PREC53;
- if (prec >= 64) nctx->af |= AF_PREC64;
- if (prec >= 113) nctx->af |= AF_PREC113;
+ if (prec >= 24) actx->af |= AF_PREC24;
+ if (prec >= 53) actx->af |= AF_PREC53;
+ if (prec >= 64) actx->af |= AF_PREC64;
+ if (prec >= 113) actx->af |= AF_PREC113;
break;
}
- nctx->want = 0;
+ actx->want = 0;
}
-static int setvar_native(struct tvec_state *tv, const char *var,
+static int setvar_assume(struct tvec_state *tv, const char *var,
const union tvec_regval *rv, void *ctx)
{
- struct nativectx *nctx = ctx;
+ struct assumectx *actx = ctx;
- if (STRCMP(var, ==, "@assume")) nctx->want = rv->u;
+ if (STRCMP(var, ==, "@assume")) actx->want = rv->u;
else return (tvec_unkregerr(tv, var));
return (0);
}
static const struct tvec_vardef assume_vardef =
- { sizeof(struct tvec_reg), setvar_native,
+ { sizeof(struct tvec_reg), setvar_assume,
{ "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }};
-static const struct tvec_vardef *findvar_native
+static const struct tvec_vardef *findvar_assume
(struct tvec_state *tv, const char *name, void **ctx_out, void *ctx)
{
if (STRCMP(name, ==, "@assume"))
return (0);
}
-static void before_native(struct tvec_state *tv, void *ctx)
+static void before_assume(struct tvec_state *tv, void *ctx)
{
- struct nativectx *nctx = ctx;
+ struct assumectx *actx = ctx;
- if (nctx->want&~nctx->af)
+ if ((tv->f&TVSF_ACTIVE) && (actx->want&~actx->af))
tvec_skip(tv, "unsatisfied assumption");
else {
DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN);
}
}
-static void after_native(struct tvec_state *tv, void *ctx)
+static void after_assume(struct tvec_state *tv, void *ctx)
{
- struct nativectx *nctx = ctx;
+ struct assumectx *actx = ctx;
- nctx->want = 0;
+ actx->want = 0;
}
#define DEF_TEST(ty, cty, TY) \
\
- static struct nativeenv ty##_env = \
- { { sizeof(struct nativectx), \
- setup_native, findvar_native, before_native, 0, after_native, 0 }, \
+ static struct assumeenv ty##_env = \
+ { { sizeof(struct assumectx), \
+ setup_assume, findvar_assume, before_assume, 0, after_assume, 0 }, \
NTV_##TY }; \
\
static const struct tvec_regdef enc##ty##_regs[] = { \
#define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test,
#define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST)
+/*----- Direct conversions ------------------------------------------------*/
+
+#define DIRECT_CONVERSIONS(_) \
+ _(flt, float, f32) \
+ _(dbl, double, f64)
+
+#define DEF_TEST1(ty, cty, fty, e) \
+ static void test_##ty##to##fty##e(const struct tvec_reg *in, \
+ struct tvec_reg *out, \
+ void *ctx) \
+ { \
+ tvec_allocbytes(&out[RZ_OUT].v, OUTSZ_##fty); \
+ out[RERR_OUT].v.u = fltfmt_##ty##to##fty##e(out[RZ_OUT].v.bytes.p, \
+ in[RX].v.f, \
+ in[RROUND].v.u); \
+ } \
+ \
+ static const struct tvec_test ty##to##fty##e##_test = \
+ { #ty "to" #fty #e, ty##to##fty##_regs, &ty##_env._env, \
+ test_##ty##to##fty##e };
+
+#define DEF_TEST(ty, cty, fty) \
+ static const struct tvec_regdef ty##to##fty##_regs[] = { \
+ { "round", &tvty_flags, RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+ { "x", &tvty_float, RX, 0, { &tvflt_##cty } }, \
+ { "z", &tvty_bytes, RZ_OUT, 0, { &fty##_range } }, \
+ { "err", &tvty_flags, RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+ TVEC_ENDREGS \
+ }; \
+ \
+ DEF_TEST1(ty, cty, fty, l) \
+ DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_TEST1(ty, cty, fty, e) \
+ static void test_##fty##e##to##ty(const struct tvec_reg *in, \
+ struct tvec_reg *out, \
+ void *ctx) \
+ { \
+ cty z; \
+ \
+ out[RERR_OUT].v.u = fltfmt_##fty##e##to##ty(&z, in[RX].v.bytes.p, \
+ in[RROUND].v.u); \
+ out[RZ_OUT].v.f = z; \
+ } \
+ \
+ static const struct tvec_test fty##e##to##ty##_test = \
+ { #fty #e "to" #ty, fty##to##ty##_regs, &ty##_env._env, \
+ test_##fty##e##to##ty };
+
+#define DEF_TEST(ty, cty, fty) \
+ static const struct tvec_regdef fty##to##ty##_regs[] = { \
+ { "round", &tvty_flags, RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+ { "x", &tvty_bytes, RX, 0, { &fty##_range } }, \
+ { "z", &tvty_float, RZ_OUT, 0, { &tvflt_##cty } }, \
+ { "err", &tvty_flags, RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+ TVEC_ENDREGS \
+ }; \
+ \
+ DEF_TEST1(ty, cty, fty, l) \
+ DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_DIRECT_CTOF_TESTS(ty, cty, fty) \
+ &ty##to##fty##l_test, &ty##to##fty##b_test,
+#define DEF_DIRECT_FTOC_TESTS(ty, cty, fty) \
+ &fty##l##to##ty##_test, &fty##b##to##ty##_test,
+#define DEF_DIRECT_TESTS(ty, cty, fty) \
+ DEF_DIRECT_CTOF_TESTS(ty, cty, fty) \
+ DEF_DIRECT_FTOC_TESTS(ty, cty, fty)
+#define DIRECT_TESTS DIRECT_CONVERSIONS(DEF_DIRECT_TESTS)
+
/*----- Main code ---------------------------------------------------------*/
static const struct tvec_test *const tests[] = {
&round_test,
NATIVE_TESTS
IEEE_TESTS
+ DIRECT_TESTS
0
};
for hi in xrange(bit(hibits)):
top = hi << hishift
for lo in xrange(bit(lobits)):
+ while True:
+ fill = R.randrange(midbit)
+ if fill != 0 and fill != midbit - 1: break
base = lo | top
yield base
- yield base | (R.randrange(midbit) << lobits)
+ yield base | (fill << lobits)
yield base | m
class ExploreParameters (object):
[encf64]
+;; NaN conversions
+
+f = QNAN
+m = #empty
+z = 7ff80000 00000000
+
+f = SNAN
+m = #empty
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00002000
+z = 7ff00000 00000001
+
+f = SNAN
+m = 00000000 00001fff
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00000000 00000001
+z = 7ff00000 00000001
+err = INEXACT
+
;; Check NaN truncation.
errmask = 0
f = 0
e = 3
m = c90fdb00
-z = 6.2831854820251465
+z = 6.2831853
@assume = stdc-nan
f = QNAN
@assume = ieee
x = #nan
f = QNAN
-m = 00000000 ; maybe
+;; m = ... something
err = OK
[encdbl]
f = QNAN
m = #empty
z = #nan
+err = OK
[decdbl]
@assume = ieee
x = #nan
f = QNAN
-m = 00000000 00000000 ; maybe
+;; m = ... something
err = OK
;;;--------------------------------------------------------------------------
+;;; Direct conversions.
+
+[f32btoflt]
+
+x = 00000000
+z = 0.0
+
+@assume = negz
+x = 80000000
+z = -0.0
+
+@assume = inf
+x = 7f800000
+z = #+inf
+
+@assume = inf
+x = ff800000
+z = #-inf
+
+@assume = stdc-nan | ieee
+x = 7fc00000
+z = #nan
+
+@assume = stdc-nan | ieee
+x = 7fa00000
+z = #nan
+
+x = 3f800000
+z = 1.0
+
+@assume = prec24
+x = 40c90fdb
+z = 6.2831853
+
+[f32ltoflt]
+
+x = 10c12549
+z = 678929
+
+[flttof32b]
+
+x = 678929
+z = 4925c110
+
+@assume = prec24
+x = 6.2831853
+z = 40c90fdb
+
+[flttof32l]
+
+x = 678929
+z = 10c12549
+
+[dbltof64b]
+
+x = 0.0
+z = 00000000 00000000
+
+x = -2964135146
+z = c1e615a3 9d400000
+
+@assume = prec53
+x = 6.283185307179586
+z = 401921fb 54442d18
+
+[dbltof64l]
+
+x = -2964135146
+z = 0000409d a315e6c1
+
+[f64btodbl]
+
+x = c1e615a3 9d400000
+z = -2964135146
+
+[f64ltodbl]
+
+x = 0000409d a315e6c1
+z = -2964135146
+
+;;;--------------------------------------------------------------------------