From dc6eea4eb5e7f6a372270f9bb9d3b9556f349b0a Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Wed, 24 Apr 2024 00:49:56 +0100 Subject: [PATCH] @@@ fltfmt wip Organization: Straylight/Edgeware From: Mark Wooding --- defs.man | 4 + mem/arena.3.in | 4 +- test/bench.3.in | 8 +- test/tvec-types.c | 13 +- utils/fltfmt-convert.c | 58 +++++--- utils/fltfmt.3.in | 320 +++++++++++++++++++++++++++++++++-------- utils/fltfmt.c | 101 +++++++++---- utils/t/fltfmt-test.c | 151 ++++++++++++++----- utils/t/fltfmt-testgen | 5 +- utils/t/fltfmt.tests | 113 ++++++++++++++- 10 files changed, 621 insertions(+), 156 deletions(-) diff --git a/defs.man b/defs.man index 379edbb..67ff7be 100644 --- a/defs.man +++ b/defs.man @@ -38,6 +38,8 @@ . ds /= \(!= . ds <= \(<= . ds >= \(>= +. ds mu \(mu +. ds sr \(sr . ds ' \(fm . ds , \h'\w'\ 'u/2u' . if \n(.g \{\ @@ -53,6 +55,8 @@ . ds se . ds us _ . ds ue +. ds mu * +. ds sr sqrt . ds ' \(aq . ds *d \,\fIdelta\/\fP . ds /= /= diff --git a/mem/arena.3.in b/mem/arena.3.in index 09207fa..593c25d 100644 --- a/mem/arena.3.in +++ b/mem/arena.3.in @@ -150,12 +150,12 @@ handlers which can't easily find the old block's size. The macro .B ALLOCV_SAFE_P returns nonzero if the product -.IR n "\ \(mu\ " sz +.IR n "\ \*(mu\ " sz is representable in type .B size_t and zero otherwise; i.e., it returns true if it would be safe to try to allocate -.IR n "\ \(mu\ " sz +.IR n "\ \*(mu\ " sz bytes. The macro .BR A_ALLOCV diff --git a/test/bench.3.in b/test/bench.3.in index d526de8..3ce25f3 100644 --- a/test/bench.3.in +++ b/test/bench.3.in @@ -794,7 +794,7 @@ with the objective of finding an iteration count such that .I n iterations of the computation take more than -.IB b ->target_s "" \fR/\(sr2 +.IB b ->target_s "" \fR/\*(sr2 seconds. If measurement fails, then @@ -807,7 +807,7 @@ is set to zero, and is filled in with the measurement; .IB t_out ->n is set to -.IR n "\ \(mu\ " base . +.IR n "\ \*(mu\ " base . .PP The .B BENCH_MEASURE_TAG @@ -902,7 +902,7 @@ the flag must be set in .IB t ->f \fR. If the timing is sufficient \(en if -.IR t\fB->t "\ \*(>=\ " target_s /\(sr2 +.IR t\fB->t "\ \*(>=\ " target_s /\*(sr2 \(en then .B bench_adapt returns a nonzero value to indicate that measurement is complete. @@ -922,7 +922,7 @@ On exit, the timing data is updated, and .IB t ->n is set to the product -.IR n "\ \(mu\ " base . +.IR n "\ \*(mu\ " base . . .SS Reporting results The diff --git a/test/tvec-types.c b/test/tvec-types.c index 8d07ea8..1d01a3d 100644 --- a/test/tvec-types.c +++ b/test/tvec-types.c @@ -528,18 +528,21 @@ static void format_size(const struct gprintf_ops *gops, void *go, static int eqish_floating_p(double x, double y, const struct tvec_floatinfo *fi) { - double t; + double t, u; + /* NaNs and infinities are equal only to each other. */ if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0); if (INFP(x)) return (x == y); else if (INFP(y)) return (0); + /* Compare finite values. */ switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) { case TVFF_EXACT: return (x == y && NEGP(x) == NEGP(y)); case TVFF_ABSDELTA: - t = x - y; if (t < 0) t = -t; return (t < fi->delta); + t = fabs(y - x); return (t < fi->delta); case TVFF_RELDELTA: - t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta); + t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN; + return (t <= u); default: abort(); } @@ -1928,8 +1931,8 @@ const struct tvec_regty tvty_float = { /* Predefined floating-point ranges. */ const struct tvec_floatinfo - tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK, - -FLT_MAX, FLT_MAX, 0.0 }, + tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK, + -FLT_MAX, FLT_MAX, FLT_EPSILON/2 }, tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK, -DBL_MAX, DBL_MAX, 0.0 }, tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 }, diff --git a/utils/fltfmt-convert.c b/utils/fltfmt-convert.c index 6db742b..cc24dba 100644 --- a/utils/fltfmt-convert.c +++ b/utils/fltfmt-convert.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * Floating-point format conversions + * Direct floating-point format conversions * * (c) 2024 Straylight/Edgeware */ @@ -40,6 +40,10 @@ _(float, flt, f32) \ _(double, dbl, f64) +#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008)) +# define FROB_NANS +#endif + #define CONV_DECLS_flt_f32 uint32 t #if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32 # if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE @@ -51,10 +55,19 @@ # else # error "unimplemented byte order" # endif -# ifdef FLTFMT__MUST_FROB_NANS -# define CONV_FROB_flt_f32 do { FLTFMT__FROB_NAN_F32(&t, rc); } while (0) +# ifdef FROB_NANS +# define CONV_FROBNANflt_f32 do { \ + if ((t&0x7f800000) != 0x7f800000 || !(t&0x007fffff)) \ + ; \ + else if (t&0x003fffff) \ + t ^= 0x00400000; \ + else { \ + t = (t&0x80000000) | 0x00000001; \ + rc |= FLTERR_INEXACT; \ + } \ + } while (0) # else -# define CONV_FROB_flt_f32 do ; while (0) +# define CONV_FROBNANflt_f32 do ; while (0) # endif #else # define CONV_LOAD_flt_f32 do { \ @@ -69,7 +82,7 @@ rc |= fltfmt_encflt(z_out, &u, r); \ fltfmt_freebits(&u); \ } while (0) -# define CONV_FROB_flt_f32 do ; while (0) +# define CONV_FROBNANflt_f32 do ; while (0) #endif #define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0) #define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0) @@ -87,15 +100,26 @@ # else # error "unimplemented byte order" # endif -# ifdef FLTFMT__MUST_FROB_NANS -# define CONV_FROB_dbl_f64 do { \ - uint32 u[2]; \ - u[0] = HI64(t); u[1] = LO64(t); \ - FLTFMT__FROB_NAN_F64(&u, rc); \ - SET64(t, u[0], u[1]); \ +# ifdef FROB_NANS +# define CONV_FROBNANdbl_f64 do { \ + kludge64 u, v; \ + SET64(u, 0x7ff00000, 0x00000000); AND64(v, t, u); \ + if (CMP64(v, ==, u)) { \ + SET64(u, 0x000fffff, 0xffffffff); AND64(v, t, u); \ + if (!ZERO64(v)) { \ + SET64(u, 0x0007ffff, 0xffffffff); AND64(v, t, u); \ + if (!ZERO64(v)) \ + { SET64(u, 0x00080000, 0x00000000); XOR64(t, t, u); } \ + else { \ + SET64(u, 0x80000000, 0x00000000); AND64(t, t, u); \ + SET64(u, 0x00000000, 0x00000001); OR64(t, t, u); \ + rc |= FLTERR_INEXACT; \ + } \ + } \ + } \ } while (0) # else -# define CONV_FROB_dbl_f64 do ; while (0) +# define CONV_FROBNANdbl_f64 do ; while (0) # endif #else # define CONV_LOAD_dbl_f64 do { \ @@ -111,7 +135,7 @@ rc |= fltfmt_encdbl(z_out, &u, r); \ fltfmt_freebits(&u); \ } while (0) -# define CONV_FROB_dbl_f64 do ; while (0) +# define CONV_FROBNANdbl_f64 do ; while (0) #endif #define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0) #define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0) @@ -145,7 +169,7 @@ unsigned rc = 0; CONV_DECLS_##cty##_##fty; \ \ CONV_LOAD_##cty##_##fty; \ - CONV_FROB_##cty##_##fty; \ + CONV_FROBNAN##cty##_##fty; \ CONV_STOREL_##cty##_##fty; \ return (rc); \ } \ @@ -155,7 +179,7 @@ unsigned rc = 0; CONV_DECLS_##cty##_##fty; \ \ CONV_LOAD_##cty##_##fty; \ - CONV_FROB_##cty##_##fty; \ + CONV_FROBNAN##cty##_##fty; \ CONV_STOREB_##cty##_##fty; \ return (rc); \ } @@ -188,7 +212,7 @@ CONVERSIONS(DEF_CONV) unsigned rc = 0; CONV_DECLS_##cty##_##fty; \ \ CONV_LOADL_##cty##_##fty; \ - CONV_FROB_##cty##_##fty; \ + CONV_FROBNAN##cty##_##fty; \ CONV_STORE_##cty##_##fty; \ return (rc); \ } \ @@ -198,7 +222,7 @@ CONVERSIONS(DEF_CONV) unsigned rc = 0; CONV_DECLS_##cty##_##fty; \ \ CONV_LOADB_##cty##_##fty; \ - CONV_FROB_##cty##_##fty; \ + CONV_FROBNAN##cty##_##fty; \ CONV_STORE_##cty##_##fty; \ return (rc); \ } diff --git a/utils/fltfmt.3.in b/utils/fltfmt.3.in index e5ae258..6343c79 100644 --- a/utils/fltfmt.3.in +++ b/utils/fltfmt.3.in @@ -295,7 +295,7 @@ This error flag is sometimes set conservatively. .TP .B FLTERR_UFLOW The conversion underflowed: -a nonzero input was too tiny (in asbolute value) to represent, +a nonzero input was too tiny (in absolute value) to represent, and a zero result was returned. .TP .B FLTERR_OFLOW @@ -393,7 +393,7 @@ and are mask with set bits corresponding to their respective predicates. Bitwise boolean logic can be applied to these masks in order to calculate the masks corresponding to -the same logical expresssion applied to the individual predicates. +the same logical expression applied to the individual predicates. .B FRPMASK_INEXACT holds if .B LOW or @@ -416,6 +416,73 @@ denoted This is likely a good option if there is no compelling argument for a different specific choice. . +.SS Direct conversions +The functions +.B fltfmt_flttof32l +and +.B fltfmt_flttof32b +convert a +.B float +argument to an IEEE\ 754 Binary32 value +in little- or big-endian byte order, respectively; +similarly +.B fltfmt_dbltof64l +and +.B fltfmt_dbltof64b +convert a +.B double +argument to an IEEE\ 754 Binary64 value +in little- or big-endian byte order, respectively. +The value to convert is given as +.I x +and the result is written at the address +.IR p . +.PP +The functions +.B fltfmt_f32ltoflt +and +.B fltfmt_f32btoflt +convert an IEEE\ 754 Binary32 value, +in little- or big-endian byte order, respectively, +to a +.BR float ; +similarly, +.B fltfmt_f32ltoflt +and +.B fltfmt_f32btoflt +convert an IEEE\ 754 Binary64 value, +in little- or big-endian byte order, respectively, +to a +.BR double . +The value to convert is read from address +.I p +and the result is written to +.RI * z_out \fR. +.PP +Both functions additionally take a rounding mode +.I r +which is applied if the conversion cannot be performed exactly, +and return an error code as described above. +.PP +On many modern platforms, the +.B float +and +.I double +types are represented internally using the IEEE +Binary32 and Binary64 formats, +so these conversions are trivial, or nearly so. +A complication arises on PA-RISC and older MIPS processors: +see the descriptions of +.B fltfmt_encieee +and +.B fltfmt_decdbl +below for the details. +.PP +On other platforms, +the conversion is decidedly nontrivial, +and makes use of the machinery described below; +this may also be useful for more complex conversions. +. .SS The floatbits structure In order to avoid a combinatorial explosion in conversion operations, all the basic conversions involve, @@ -540,7 +607,7 @@ or +1 if .B FLTF_NEG is clear, then the number represented is -.IR s "\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se. +.IR s "\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se. .PP A .B struct floatbits @@ -564,6 +631,9 @@ neither allocates any storage or other resources, leaving .B frac null. +In this state, it is safe to modify the arena pointer +.B a +if the default initialization is unsatisfactory. .PP The .B fltfmt_allocfrac @@ -707,7 +777,7 @@ If .IR e "\ =\ \-" e \*(us0\*(ue then the value is zero or a subnormal, with the value -.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e +1\*(se. +.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e +1\*(se. In particular, if .IR m "\ =\ 0" @@ -731,7 +801,7 @@ but the result will be as described. If .RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1" then the value is a (supposedly) normal number -.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se. +.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se. If .I h is zero then @@ -821,58 +891,7 @@ in the notation above this is The precision; in the notation above this is .IR p . -.PP -The following IEEE formats descriptions are already defined. -.TP -.B "fltfmt_f16" -The IEEE\ 754 Binary16 format, with -.IR w "\ =\ 5," -.IR p "\ =\ 11," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_f32" -The IEEE\ 754 Binary32 (`single precision') format, with -.IR w "\ =\ 8," -.IR p "\ =\ 24," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_f64" -The IEEE\ 754 Binary64 (`double precision') format, with -.IR w "\ =\ 11," -.IR p "\ =\ 53," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_f128" -The IEEE\ 754 Binary128 (`quad precision') format, with -.IR w "\ =\ 15," -.IR p "\ =\ 113," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_mini" -An eight-bit `minifloat' format, with -.IR w "\ =\ 4," -.IR p "\ =\ 4," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_bf16" -The Google `BFloat16' format, with -.IR w "\ =\ 8," -.IR p "\ =\ 8," -and -.IR h "\ =\ 0." -.TP -.B "fltfmt_idblext80" -The Intel 8087 80-bit `double extended' format, with -.IR w "\ =\ 15," -.IR p "\ =\ 64," -and -.IR h "\ =\ 1." -.PP + The .B fltfmt_encieee and @@ -921,6 +940,10 @@ discarding low-significant bits; if the input is a finite value, then the significand is rounded to fit according to the requested rounding mode. +If a signalling NaN ends up with all of its payload bits zero, +as a result of truncation or otherwise, +then the least-signficant bit of the output payload is forced on +in order to distinguish the result from an infinity. The possible errors are .B FLTERR_UFLOW if the value is unrepresentably tiny, @@ -969,10 +992,185 @@ the result is returned anyway, with the unit bit interpreted as encoded in finite numbers, and discarded in infinities and NaNs. .PP -For each of the format - - - +A number of IEEE and IEEE-like formats are predefined: +for format +.IR fmt , +there is +an IEEE format description, named +.BI fltfmt_ fmt \fR, +together with encoding and decoding functions, named +.BI fltfmt_enc fmt +and +.BI fltfmt_dec fmt \fR; +for the most part, +these functions use more convenient types +to hold encoded values. +.TP +.B "f16" +The IEEE\ 754 Binary16 format, with +.IR w "\ =\ 5," +.IR p "\ =\ 11," +and +.IR h "\ =\ 0;" +stored in a +.BR uint16 . +.TP +.B "fltfmt_f32" +The IEEE\ 754 Binary32 (`single precision') format, with +.IR w "\ =\ 8," +.IR p "\ =\ 24," +and +.IR h "\ =\ 0;" +stored in a +.BR uint32 . +.TP +.B "fltfmt_f64" +The IEEE\ 754 Binary64 (`double precision') format, with +.IR w "\ =\ 11," +.IR p "\ =\ 53," +and +.IR h "\ =\ 0;" +stored in a +.B kludge64 +(see +.BR bits (3) +for details). +.TP +.B "fltfmt_f128" +The IEEE\ 754 Binary128 (`quad precision') format, with +.IR w "\ =\ 15," +.IR p "\ =\ 113," +and +.IR h "\ =\ 0;"uint +stored in a big-endian vector of +.BR uint32 , +just as for the generic functions described above. +.TP +.B "fltfmt_mini" +An eight-bit `minifloat' format, with +.IR w "\ =\ 4," +.IR p "\ =\ 4," +and +.IR h "\ =\ 0;" +stored in an +.BR octet . +.TP +.B "fltfmt_bf16" +The Google `BFloat16' format, with +.IR w "\ =\ 8," +.IR p "\ =\ 8," +and +.IR h "\ =\ 0;" +stored in a +.BR uint16 . +.TP +.B "fltfmt_idblext80" +The Intel 8087 80-bit `double extended' format, with +.IR w "\ =\ 15," +.IR p "\ =\ 64," +and +.IR h "\ =\ 1;" +stored as a +.B uint16 +holding the sign and exponent, +and a +.B kludge64 +holding the significand. +. +.SS Native formats +There are also functions for converting between +.B struct floatbits +and the implementation's native floating-point types +.B float +(abbreviated +.BR flt ), +.B double +(abbreviated +.BR dbl ), +and +.B "long double" +(abbreviated +.BR ldbl ). +.PP +For each native type abbreviation +.IR ty , +there are functions +.BI fltfmt_enc ty +and +.BI fltfmt_dec ty \fR, +which respectively convert the value held in +.B struct floatbits +to or from a value of the corresponding C type. +(The functions acting on +.B long double +values are only available if the platform supports C99 or later.) +.PP +The +.BI fltfmt_enc ty +functions read an input value from a +.B struct floatbits +pointer +.I x +and store the encoded result through a pointer +.I z_out +to the appropriate C type; +the function also receives a rounding mode +.IR r , +but see below. +The +.BI fltfmt_dec ty +functions are given an input value of the appropriate C type, +and store the decoded result in a +.B struct floatbits +structure pointed to by +.I z_arg ; +again, the function also receives a rounding mode +.IR r , +but see below. +.PP +These functions can use two different strategies for conversion. +If the compile-time configuration step detects +that the implementation is using +a specific, supported format for a native type, +then conversions involving the native type +are performed using the existing machinery for that format. +For example, if, +as is in fact nearly universal on modern-ish systems, +the +.B double +type uses the IEEE\ 754 Binary64 format, +then +.B fltfmt_encdbl +and +.B fltfmt_decdbl +use the functions +.B fltfmt_encf64 +and +.B fltfmt_decf64 +described above for the conversion. +This approach has the benefit that +everything is done under the control of the +.B fltfmt +machinery, +which can faithfully preserve signs of zero values, +and NaN payloads. +The error conditions are, for the most part, the same as for the +.B fltfmt_encieee +and +.B fltfmt_decieee +functions described above. +The encoding functions have an additional source of inexactness +on PA-RISC and older MIPS processors +which use the reversed quiet/signalling NaN convention: +a quiet NaN with an all-zero payload +is not representable on such implementations +(the encoding is an infinity instead); +in this situation, +the least significant payload bit is forced on, +just as if the payload required truncation, +and +.B FLTERR_INEXACT +is returned. . .\"-------------------------------------------------------------------------- .SH "SEE ALSO" diff --git a/utils/fltfmt.c b/utils/fltfmt.c index deaf553..cb39eca 100644 --- a/utils/fltfmt.c +++ b/utils/fltfmt.c @@ -39,9 +39,19 @@ #include "bits.h" #include "fltfmt.h" #include "growbuf.h" -#include "macros.h" #include "maths.h" +/*----- Preliminary hacking -----------------------------------------------*/ + +/* The native-format conversions are -- at least if the format is + * unrecognized -- dependent on the implementation's rounding. Our own + * rounding mode specifications don't fit into the framework very well, but I + * still want to respect the prevailing rounding mode. + * + * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%. But + * that doesn't actually work on GCC, or on Clang from not too long ago. So + * use compiler-specific hacking to support this. + */ #if GCC_VERSION_P(4, 4) # pragma GCC optimize "-frounding-math" #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0) @@ -531,7 +541,7 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x, return (rc); } -/*----- IEEE formats ------------------------------------------------------*/ +/*----- IEEE and related formats ------------------------------------------*/ /* IEEE (and related) format descriptions. */ const struct fltfmt_ieeefmt @@ -622,16 +632,24 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, /* Copy the payload. * * If the payload is all-zero and we're meant to set a signalling NaN - * then report an exactness failure and set the low bit. + * then report an exactness failure and set the least-significant bit. */ mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32; - for (i = 0; i < nw - mw; i++) z[i] = 0; - n = x->n; if (n > mw) n = nw; - t = shr(z + i, x->frac, n, sh); i += n; - if (i < nw) z[i++] = t; - sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++; - if (f&FLTF_QNAN) z0 |= B32(sh); - else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; } + n = x->n; + if (n < mw) j = 0; + else { n = mw; j = sh; } + if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) { + ERR(FLTERR_INEXACT); + n = nw - 1; for (i = 0; i < n; i++) z[i] = 0; + z[i++] = 1; + } else { + for (i = 0; i < nw - mw; i++) z[i] = 0; + n = x->n; if (n > mw) n = mw; + t = shr(z + i, x->frac, n, sh); i += n; + if (i < nw) z[i++] = t; + sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++; + if (f&FLTF_QNAN) z0 |= B32(sh); + } /* Set the exponent and, for non-hidden-bit formats, the unit bit. */ z0 |= M32(fmt->expwd) << esh; @@ -1123,6 +1141,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) # define DIGIT_BITS 4 #endif +/* Take note if we need to cope with the revered quiet/signalling convention + * used by HP-PA and older MIPS processors. + */ +#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008)) +# define FROB_NANS +#endif + /* --- @ENCFLT@ --- * * * Arguments: @ty@ = the C type to encode @@ -1205,8 +1230,23 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) #endif +#ifdef FROB_NANS +# define FROBNAN_ENCDECLS struct floatbits _y +# define FROBNAN_ENC do { \ + if (_x->f&FLTF_NANMASK) { \ + _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n; \ + _x = &_y; \ + } \ + } while (0) +#else +# define FROBNAN_ENCDECLS +# define FROBNAN_ENC do ; while (0) +#endif + #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do { \ + const struct floatbits *_x = (x); \ unsigned _rc = 0; \ + FROBNAN_ENCDECLS; \ \ /* See if the native format is one that we recognize. */ \ switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \ @@ -1215,8 +1255,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[1]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_F32(_t, _rc); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: STORE32_B(_z, _t[0]); break; \ case FLTFMT_LE: STORE32_L(_z, _t[0]); break; \ @@ -1227,8 +1267,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) case FLTFMT_IEEE_F64: { \ uint32 _t[2]; \ unsigned char *_z = (unsigned char *)(z_out); \ - (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_F64(_t, _rc); \ + \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \ @@ -1247,8 +1288,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[4]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - FLTFMT__FROB_NAN_F128(_t, _rc); \ - (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \ @@ -1266,8 +1307,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[3]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_idblext80, \ + _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE16_B(_z + 0, _t[0]); \ @@ -1284,7 +1326,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) default: { \ /* We must do this the hard way. */ \ \ - const struct floatbits *_x = (x); \ ty _z; \ unsigned _i; \ ENC_ROUND_DECLS; \ @@ -1466,7 +1507,16 @@ unsigned fltfmt_encldbl(long double *z_out, } while (0) #endif +#ifdef FROB_NANS +# define FROBNAN_DEC do { \ + if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK; \ + } while (0) +#else +# define FROBNAN_DEC do ; while (0) +#endif + #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do { \ + struct floatbits *_z = (z_out); \ unsigned _rc = 0; \ \ switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \ @@ -1480,8 +1530,7 @@ unsigned fltfmt_encldbl(long double *z_out, case FLTFMT_LE: _t[0] = LOAD32_L(_x); break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F32(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_IEEE_F64: { \ @@ -1500,8 +1549,7 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F64(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_IEEE_F128: { \ @@ -1519,8 +1567,7 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F128(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_INTEL_F80: { \ @@ -1538,12 +1585,10 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC; \ } break; \ \ default: { \ - struct floatbits *_z = (z_out); \ ty _x = (x), _y; \ unsigned _i, _n, _f = 0; \ uint32 _t; \ diff --git a/utils/t/fltfmt-test.c b/utils/t/fltfmt-test.c index 5b60607..9ed7c68 100644 --- a/utils/t/fltfmt-test.c +++ b/utils/t/fltfmt-test.c @@ -208,7 +208,7 @@ static void test_round(const struct tvec_reg *in, struct tvec_reg *out, static const struct tvec_test round_test = { "round", round_regs, 0, test_round }; -/*----- IEEE format conversion --------------------------------------------*/ +/*----- IEEE format conversions -------------------------------------------*/ #define IEEE_FORMATS(_) \ _(mini, 1) \ @@ -348,7 +348,7 @@ IEEE_FORMATS(DEF_TEST) #define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test, #define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST) -/*----- Native format conversion ------------------------------------------*/ +/*----- Native format conversions -----------------------------------------*/ #define NATIVE_FORMATS(_) \ _(flt, float, FLT) \ @@ -393,64 +393,64 @@ static const struct tvec_flag assume_flags[] = { static const struct tvec_flaginfo assume_flaginfo = { "assume", assume_flags, &tvrange_uint }; -struct nativeenv { struct tvec_env _env; unsigned ntv; }; -struct nativectx { unsigned af, want; }; +struct assumeenv { struct tvec_env _env; unsigned ntv; }; +struct assumectx { unsigned af, want; }; -static void setup_native(struct tvec_state *tv, const struct tvec_env *env, +static void setup_assume(struct tvec_state *tv, const struct tvec_env *env, void *pctx, void *ctx) { - const struct nativeenv *nenv = (const struct nativeenv *)env; - const struct ntvinfo *info = &ntvinfo[nenv->ntv]; - struct nativectx *nctx = ctx; + const struct assumeenv *aenv = (const struct assumeenv *)env; + const struct ntvinfo *info = &ntvinfo[aenv->ntv]; + struct assumectx *actx = ctx; double prec; switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { case FLTFMT_IEEE_F32: - nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24; + actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24; break; case FLTFMT_IEEE_F64: - nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53; + actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53; break; case FLTFMT_IEEE_F128: - nctx->af = AF_NEGZ | AF_INF | AF_IEEE | + actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113; break; case FLTFMT_INTEL_F80: - nctx->af = AF_NEGZ | AF_INF | AF_IEEE | + actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53 | AF_PREC64; break; default: - nctx->af = 0; - if (NEGP(-0.0)) nctx->af |= AF_NEGZ; + actx->af = 0; + if (NEGP(-0.0)) actx->af |= AF_NEGZ; #ifdef INF - nctx->af |= AF_INF; + actx->af |= AF_INF; #endif #ifdef NAN - nctx->af |= AF_STDCNAN; + actx->af |= AF_STDCNAN; #endif prec = log(FLT_RADIX)/log(2.0)*info->mant_dig; - if (prec >= 24) nctx->af |= AF_PREC24; - if (prec >= 53) nctx->af |= AF_PREC53; - if (prec >= 64) nctx->af |= AF_PREC64; - if (prec >= 113) nctx->af |= AF_PREC113; + if (prec >= 24) actx->af |= AF_PREC24; + if (prec >= 53) actx->af |= AF_PREC53; + if (prec >= 64) actx->af |= AF_PREC64; + if (prec >= 113) actx->af |= AF_PREC113; break; } - nctx->want = 0; + actx->want = 0; } -static int setvar_native(struct tvec_state *tv, const char *var, +static int setvar_assume(struct tvec_state *tv, const char *var, const union tvec_regval *rv, void *ctx) { - struct nativectx *nctx = ctx; + struct assumectx *actx = ctx; - if (STRCMP(var, ==, "@assume")) nctx->want = rv->u; + if (STRCMP(var, ==, "@assume")) actx->want = rv->u; else return (tvec_unkregerr(tv, var)); return (0); } static const struct tvec_vardef assume_vardef = - { sizeof(struct tvec_reg), setvar_native, + { sizeof(struct tvec_reg), setvar_assume, { "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }}; -static const struct tvec_vardef *findvar_native +static const struct tvec_vardef *findvar_assume (struct tvec_state *tv, const char *name, void **ctx_out, void *ctx) { if (STRCMP(name, ==, "@assume")) @@ -459,11 +459,11 @@ static const struct tvec_vardef *findvar_native return (0); } -static void before_native(struct tvec_state *tv, void *ctx) +static void before_assume(struct tvec_state *tv, void *ctx) { - struct nativectx *nctx = ctx; + struct assumectx *actx = ctx; - if (nctx->want&~nctx->af) + if ((tv->f&TVSF_ACTIVE) && (actx->want&~actx->af)) tvec_skip(tv, "unsatisfied assumption"); else { DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN); @@ -471,18 +471,18 @@ static void before_native(struct tvec_state *tv, void *ctx) } } -static void after_native(struct tvec_state *tv, void *ctx) +static void after_assume(struct tvec_state *tv, void *ctx) { - struct nativectx *nctx = ctx; + struct assumectx *actx = ctx; - nctx->want = 0; + actx->want = 0; } #define DEF_TEST(ty, cty, TY) \ \ - static struct nativeenv ty##_env = \ - { { sizeof(struct nativectx), \ - setup_native, findvar_native, before_native, 0, after_native, 0 }, \ + static struct assumeenv ty##_env = \ + { { sizeof(struct assumectx), \ + setup_assume, findvar_assume, before_assume, 0, after_assume, 0 }, \ NTV_##TY }; \ \ static const struct tvec_regdef enc##ty##_regs[] = { \ @@ -538,12 +538,93 @@ NATIVE_FORMATS(DEF_TEST) #define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test, #define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST) +/*----- Direct conversions ------------------------------------------------*/ + +#define DIRECT_CONVERSIONS(_) \ + _(flt, float, f32) \ + _(dbl, double, f64) + +#define DEF_TEST1(ty, cty, fty, e) \ + static void test_##ty##to##fty##e(const struct tvec_reg *in, \ + struct tvec_reg *out, \ + void *ctx) \ + { \ + tvec_allocbytes(&out[RZ_OUT].v, OUTSZ_##fty); \ + out[RERR_OUT].v.u = fltfmt_##ty##to##fty##e(out[RZ_OUT].v.bytes.p, \ + in[RX].v.f, \ + in[RROUND].v.u); \ + } \ + \ + static const struct tvec_test ty##to##fty##e##_test = \ + { #ty "to" #fty #e, ty##to##fty##_regs, &ty##_env._env, \ + test_##ty##to##fty##e }; + +#define DEF_TEST(ty, cty, fty) \ + static const struct tvec_regdef ty##to##fty##_regs[] = { \ + { "round", &tvty_flags, RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \ + { "x", &tvty_float, RX, 0, { &tvflt_##cty } }, \ + { "z", &tvty_bytes, RZ_OUT, 0, { &fty##_range } }, \ + { "err", &tvty_flags, RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \ + TVEC_ENDREGS \ + }; \ + \ + DEF_TEST1(ty, cty, fty, l) \ + DEF_TEST1(ty, cty, fty, b) + +DIRECT_CONVERSIONS(DEF_TEST) + +#undef DEF_TEST1 +#undef DEF_TEST + +#define DEF_TEST1(ty, cty, fty, e) \ + static void test_##fty##e##to##ty(const struct tvec_reg *in, \ + struct tvec_reg *out, \ + void *ctx) \ + { \ + cty z; \ + \ + out[RERR_OUT].v.u = fltfmt_##fty##e##to##ty(&z, in[RX].v.bytes.p, \ + in[RROUND].v.u); \ + out[RZ_OUT].v.f = z; \ + } \ + \ + static const struct tvec_test fty##e##to##ty##_test = \ + { #fty #e "to" #ty, fty##to##ty##_regs, &ty##_env._env, \ + test_##fty##e##to##ty }; + +#define DEF_TEST(ty, cty, fty) \ + static const struct tvec_regdef fty##to##ty##_regs[] = { \ + { "round", &tvty_flags, RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \ + { "x", &tvty_bytes, RX, 0, { &fty##_range } }, \ + { "z", &tvty_float, RZ_OUT, 0, { &tvflt_##cty } }, \ + { "err", &tvty_flags, RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \ + TVEC_ENDREGS \ + }; \ + \ + DEF_TEST1(ty, cty, fty, l) \ + DEF_TEST1(ty, cty, fty, b) + +DIRECT_CONVERSIONS(DEF_TEST) + +#undef DEF_TEST1 +#undef DEF_TEST + +#define DEF_DIRECT_CTOF_TESTS(ty, cty, fty) \ + &ty##to##fty##l_test, &ty##to##fty##b_test, +#define DEF_DIRECT_FTOC_TESTS(ty, cty, fty) \ + &fty##l##to##ty##_test, &fty##b##to##ty##_test, +#define DEF_DIRECT_TESTS(ty, cty, fty) \ + DEF_DIRECT_CTOF_TESTS(ty, cty, fty) \ + DEF_DIRECT_FTOC_TESTS(ty, cty, fty) +#define DIRECT_TESTS DIRECT_CONVERSIONS(DEF_DIRECT_TESTS) + /*----- Main code ---------------------------------------------------------*/ static const struct tvec_test *const tests[] = { &round_test, NATIVE_TESTS IEEE_TESTS + DIRECT_TESTS 0 }; diff --git a/utils/t/fltfmt-testgen b/utils/t/fltfmt-testgen index 7f63664..b9670b0 100755 --- a/utils/t/fltfmt-testgen +++ b/utils/t/fltfmt-testgen @@ -73,9 +73,12 @@ def explore(wd, lobits, hibits): for hi in xrange(bit(hibits)): top = hi << hishift for lo in xrange(bit(lobits)): + while True: + fill = R.randrange(midbit) + if fill != 0 and fill != midbit - 1: break base = lo | top yield base - yield base | (R.randrange(midbit) << lobits) + yield base | (fill << lobits) yield base | m class ExploreParameters (object): diff --git a/utils/t/fltfmt.tests b/utils/t/fltfmt.tests index bf85107..840f514 100644 --- a/utils/t/fltfmt.tests +++ b/utils/t/fltfmt.tests @@ -225,6 +225,31 @@ err = INEXACT [encf64] +;; NaN conversions + +f = QNAN +m = #empty +z = 7ff80000 00000000 + +f = SNAN +m = #empty +z = 7ff00000 00000001 +err = INEXACT + +f = SNAN +m = 00000000 00002000 +z = 7ff00000 00000001 + +f = SNAN +m = 00000000 00001fff +z = 7ff00000 00000001 +err = INEXACT + +f = SNAN +m = 00000000 00000000 00000001 +z = 7ff00000 00000001 +err = INEXACT + ;; Check NaN truncation. errmask = 0 @@ -418,7 +443,7 @@ z = -0.625 f = 0 e = 3 m = c90fdb00 -z = 6.2831854820251465 +z = 6.2831853 @assume = stdc-nan f = QNAN @@ -467,7 +492,7 @@ err = INEXACT @assume = ieee x = #nan f = QNAN -m = 00000000 ; maybe +;; m = ... something err = OK [encdbl] @@ -508,6 +533,7 @@ err = INEXACT f = QNAN m = #empty z = #nan +err = OK [decdbl] @@ -545,7 +571,88 @@ err = INEXACT @assume = ieee x = #nan f = QNAN -m = 00000000 00000000 ; maybe +;; m = ... something err = OK ;;;-------------------------------------------------------------------------- +;;; Direct conversions. + +[f32btoflt] + +x = 00000000 +z = 0.0 + +@assume = negz +x = 80000000 +z = -0.0 + +@assume = inf +x = 7f800000 +z = #+inf + +@assume = inf +x = ff800000 +z = #-inf + +@assume = stdc-nan | ieee +x = 7fc00000 +z = #nan + +@assume = stdc-nan | ieee +x = 7fa00000 +z = #nan + +x = 3f800000 +z = 1.0 + +@assume = prec24 +x = 40c90fdb +z = 6.2831853 + +[f32ltoflt] + +x = 10c12549 +z = 678929 + +[flttof32b] + +x = 678929 +z = 4925c110 + +@assume = prec24 +x = 6.2831853 +z = 40c90fdb + +[flttof32l] + +x = 678929 +z = 10c12549 + +[dbltof64b] + +x = 0.0 +z = 00000000 00000000 + +x = -2964135146 +z = c1e615a3 9d400000 + +@assume = prec53 +x = 6.283185307179586 +z = 401921fb 54442d18 + +[dbltof64l] + +x = -2964135146 +z = 0000409d a315e6c1 + +[f64btodbl] + +x = c1e615a3 9d400000 +z = -2964135146 + +[f64ltodbl] + +x = 0000409d a315e6c1 +z = -2964135146 + +;;;-------------------------------------------------------------------------- -- [mdw]