chiark / gitweb /
@@@ fltfmt wip mdw/tvec
authorMark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
defs.man
mem/arena.3.in
test/bench.3.in
test/tvec-types.c
utils/fltfmt-convert.c
utils/fltfmt.3.in
utils/fltfmt.c
utils/t/fltfmt-test.c
utils/t/fltfmt-testgen
utils/t/fltfmt.tests

index 379edbb7687c734d559cf91309f5b9fdcc719aee..67ff7be50cca0e541e590aec20ba3bc40a74d1d3 100644 (file)
--- a/defs.man
+++ b/defs.man
@@ -38,6 +38,8 @@
 .  ds /= \(!=
 .  ds <= \(<=
 .  ds >= \(>=
+.  ds mu \(mu
+.  ds sr \(sr
 .  ds ' \(fm
 .  ds , \h'\w'\ 'u/2u'
 .  if \n(.g \{\
@@ -53,6 +55,8 @@
 .  ds se
 .  ds us _
 .  ds ue
+.  ds mu *
+.  ds sr sqrt
 .  ds ' \(aq
 .  ds *d \,\fIdelta\/\fP
 .  ds /= /=
index 09207fa6d7a8f7b708763e045d53d749081f5f99..593c25d5f53e4d80ca30c752b9c9f8801aa2a7f4 100644 (file)
@@ -150,12 +150,12 @@ handlers which can't easily find the old block's size.
 The macro
 .B ALLOCV_SAFE_P
 returns nonzero if the product
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
 is representable in type
 .B size_t
 and zero otherwise;
 i.e., it returns true if it would be safe to try to allocate
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
 bytes.
 The macro
 .BR A_ALLOCV
index d526de8f82154ac4e72a564360d724d3645b2b7b..3ce25f36e95f90237573a615c02e8d610dad3992 100644 (file)
@@ -794,7 +794,7 @@ with the objective of finding an iteration count
 such that
 .I n
 iterations of the computation take more than
-.IB b ->target_s "" \fR/\(sr2
+.IB b ->target_s "" \fR/\*(sr2
 seconds.
 If measurement fails,
 then
@@ -807,7 +807,7 @@ is set to zero, and
 is filled in with the measurement;
 .IB t_out ->n
 is set to
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
 .PP
 The
 .B BENCH_MEASURE_TAG
@@ -902,7 +902,7 @@ the
 flag must be set in
 .IB t ->f \fR.
 If the timing is sufficient \(en if
-.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+.IR t\fB->t "\ \*(>=\ " target_s /\*(sr2
 \(en then
 .B bench_adapt
 returns a nonzero value to indicate that measurement is complete.
@@ -922,7 +922,7 @@ On exit, the timing data is updated,
 and
 .IB t ->n
 is set to the product
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
 .
 .SS Reporting results
 The
index 8d07ea82b4df21dc43721e8055cbacec0394c992..1d01a3d44027561e3d47e43d9b26fffefbd705c5 100644 (file)
@@ -528,18 +528,21 @@ static void format_size(const struct gprintf_ops *gops, void *go,
 static int eqish_floating_p(double x, double y,
                            const struct tvec_floatinfo *fi)
 {
-  double t;
+  double t, u;
 
+  /* NaNs and infinities are equal only to each other. */
   if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
   if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
 
+  /* Compare finite values. */
   switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
     case TVFF_EXACT:
       return (x == y && NEGP(x) == NEGP(y));
     case TVFF_ABSDELTA:
-      t = x - y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); return (t < fi->delta);
     case TVFF_RELDELTA:
-      t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
+      return (t <= u);
     default:
       abort();
   }
@@ -1928,8 +1931,8 @@ const struct tvec_regty tvty_float = {
 
 /* Predefined floating-point ranges. */
 const struct tvec_floatinfo
-  tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
-                 -FLT_MAX, FLT_MAX, 0.0 },
+  tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
+                 -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
   tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
                   -DBL_MAX, DBL_MAX, 0.0 },
   tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
index 6db742befdef8f2058568ea42f10a65d70153ddb..cc24dba5f777b81f7201a4820ef09ef03121bc60 100644 (file)
@@ -1,6 +1,6 @@
 /* -*-c-*-
  *
- * Floating-point format conversions
+ * Direct floating-point format conversions
  *
  * (c) 2024 Straylight/Edgeware
  */
   _(float, flt, f32)                                                   \
   _(double, dbl, f64)
 
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
 #define CONV_DECLS_flt_f32 uint32 t
 #if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32
 #  if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE
 #  else
 #    error "unimplemented byte order"
 #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_flt_f32 do { FLTFMT__FROB_NAN_F32(&t, rc); } while (0)
+#  ifdef FROB_NANS
+#    define CONV_FROBNANflt_f32 do {                                   \
+       if ((t&0x7f800000) != 0x7f800000 || !(t&0x007fffff))            \
+        ;                                                              \
+       else if (t&0x003fffff)                                          \
+        t ^= 0x00400000;                                               \
+       else {                                                          \
+        t = (t&0x80000000) | 0x00000001;                               \
+        rc |= FLTERR_INEXACT;                                          \
+       }                                                               \
+     } while (0)
 #  else
-#    define CONV_FROB_flt_f32 do ; while (0)
+#    define CONV_FROBNANflt_f32 do ; while (0)
 #  endif
 #else
 #  define CONV_LOAD_flt_f32 do {                                       \
@@ -69,7 +82,7 @@
      rc |= fltfmt_encflt(z_out, &u, r);                                        \
      fltfmt_freebits(&u);                                              \
    } while (0)
-#  define CONV_FROB_flt_f32 do ; while (0)
+#  define CONV_FROBNANflt_f32 do ; while (0)
 #endif
 #define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0)
 #define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0)
 #  else
 #    error "unimplemented byte order"
 #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_dbl_f64 do {                                     \
-       uint32 u[2];                                                    \
-       u[0] = HI64(t); u[1] = LO64(t);                                 \
-       FLTFMT__FROB_NAN_F64(&u, rc);                                   \
-       SET64(t, u[0], u[1]);                                           \
+#  ifdef FROB_NANS
+#    define CONV_FROBNANdbl_f64 do {                                   \
+       kludge64 u, v;                                                  \
+       SET64(u, 0x7ff00000, 0x00000000); AND64(v, t, u);               \
+       if (CMP64(v, ==, u)) {                                          \
+        SET64(u, 0x000fffff, 0xffffffff); AND64(v, t, u);              \
+        if (!ZERO64(v)) {                                              \
+          SET64(u, 0x0007ffff, 0xffffffff); AND64(v, t, u);            \
+          if (!ZERO64(v))                                              \
+            { SET64(u, 0x00080000, 0x00000000); XOR64(t, t, u); }      \
+          else {                                                       \
+            SET64(u, 0x80000000, 0x00000000); AND64(t, t, u);          \
+            SET64(u, 0x00000000, 0x00000001); OR64(t, t, u);           \
+            rc |= FLTERR_INEXACT;                                      \
+          }                                                            \
+        }                                                              \
+       }                                                               \
      } while (0)
 #  else
-#    define CONV_FROB_dbl_f64 do ; while (0)
+#    define CONV_FROBNANdbl_f64 do ; while (0)
 #  endif
 #else
 #  define CONV_LOAD_dbl_f64 do {                                       \
      rc |= fltfmt_encdbl(z_out, &u, r);                                        \
      fltfmt_freebits(&u);                                              \
    } while (0)
-#  define CONV_FROB_dbl_f64 do ; while (0)
+#  define CONV_FROBNANdbl_f64 do ; while (0)
 #endif
 #define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0)
 #define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0)
     unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                        \
     CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
     CONV_STOREL_##cty##_##fty;                                         \
     return (rc);                                                       \
   }                                                                    \
     unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                        \
     CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
     CONV_STOREB_##cty##_##fty;                                         \
     return (rc);                                                       \
   }
@@ -188,7 +212,7 @@ CONVERSIONS(DEF_CONV)
     unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                        \
     CONV_LOADL_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
     CONV_STORE_##cty##_##fty;                                          \
     return (rc);                                                       \
   }                                                                    \
@@ -198,7 +222,7 @@ CONVERSIONS(DEF_CONV)
     unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                        \
     CONV_LOADB_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
     CONV_STORE_##cty##_##fty;                                          \
     return (rc);                                                       \
   }
index e5ae258bc8b430f9e4172f80dea9ee5221275ca0..6343c7966e7a1d3270a995f0dd43d418d4f2dfbc 100644 (file)
@@ -295,7 +295,7 @@ This error flag is sometimes set conservatively.
 .TP
 .B FLTERR_UFLOW
 The conversion underflowed:
-a nonzero input was too tiny (in asbolute value) to represent,
+a nonzero input was too tiny (in absolute value) to represent,
 and a zero result was returned.
 .TP
 .B FLTERR_OFLOW
@@ -393,7 +393,7 @@ and
 are mask with set bits corresponding to their respective predicates.
 Bitwise boolean logic can be applied to these masks
 in order to calculate the masks corresponding to
-the same logical expresssion applied to the individual predicates.
+the same logical expression applied to the individual predicates.
 .B FRPMASK_INEXACT holds if
 .B LOW
 or
@@ -416,6 +416,73 @@ denoted
 This is likely a good option
 if there is no compelling argument for a different specific choice.
 .
+.SS Direct conversions
+The functions
+.B fltfmt_flttof32l
+and
+.B fltfmt_flttof32b
+convert a
+.B float
+argument to an IEEE\ 754 Binary32 value
+in little- or big-endian byte order, respectively;
+similarly
+.B fltfmt_dbltof64l
+and
+.B fltfmt_dbltof64b
+convert a
+.B double
+argument to an IEEE\ 754 Binary64 value
+in little- or big-endian byte order, respectively.
+The value to convert is given as
+.I x
+and the result is written at the address
+.IR p .
+.PP
+The functions
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary32 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR float ;
+similarly,
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary64 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR double .
+The value to convert is read from address
+.I p
+and the result is written to
+.RI * z_out \fR.
+.PP
+Both functions additionally take a rounding mode
+.I r
+which is applied if the conversion cannot be performed exactly,
+and return an error code as described above.
+.PP
+On many modern platforms, the
+.B float
+and
+.I double
+types are represented internally using the IEEE
+Binary32 and Binary64 formats,
+so these conversions are trivial, or nearly so.
+A complication arises on PA-RISC and older MIPS processors:
+see the descriptions of
+.B fltfmt_encieee
+and
+.B fltfmt_decdbl
+below for the details.
+.PP
+On other platforms,
+the conversion is decidedly nontrivial,
+and makes use of the machinery described below;
+this may also be useful for more complex conversions.
+.
 .SS The floatbits structure
 In order to avoid a combinatorial explosion in conversion operations,
 all the basic conversions involve,
@@ -540,7 +607,7 @@ or +1 if
 .B FLTF_NEG
 is clear,
 then the number represented is
-.IR s "\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.IR s "\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
 .PP
 A
 .B struct floatbits
@@ -564,6 +631,9 @@ neither allocates any storage or other resources,
 leaving
 .B frac
 null.
+In this state, it is safe to modify the arena pointer
+.B a
+if the default initialization is unsatisfactory.
 .PP
 The
 .B fltfmt_allocfrac
@@ -707,7 +777,7 @@ If
 .IR e "\ =\ \-" e \*(us0\*(ue
 then the value is zero or a subnormal,
 with the value
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e +1\*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e +1\*(se.
 In particular,
 if
 .IR m "\ =\ 0"
@@ -731,7 +801,7 @@ but the result will be as described.
 If
 .RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1"
 then the value is a (supposedly) normal number
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
 If
 .I h
 is zero then
@@ -821,58 +891,7 @@ in the notation above this is
 The precision;
 in the notation above this is
 .IR p .
-.PP
-The following IEEE formats descriptions are already defined.
-.TP
-.B "fltfmt_f16"
-The IEEE\ 754 Binary16 format, with
-.IR w "\ =\ 5,"
-.IR p "\ =\ 11,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f32"
-The IEEE\ 754 Binary32 (`single precision') format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 24,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f64"
-The IEEE\ 754 Binary64 (`double precision') format, with
-.IR w "\ =\ 11,"
-.IR p "\ =\ 53,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f128"
-The IEEE\ 754 Binary128 (`quad precision') format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 113,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_mini"
-An eight-bit `minifloat' format, with
-.IR w "\ =\ 4,"
-.IR p "\ =\ 4,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_bf16"
-The Google `BFloat16' format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 8,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_idblext80"
-The Intel 8087 80-bit `double extended' format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 64,"
-and
-.IR h "\ =\ 1."
-.PP
+
 The
 .B fltfmt_encieee
 and
@@ -921,6 +940,10 @@ discarding low-significant bits;
 if the input is a finite value,
 then the significand is rounded to fit
 according to the requested rounding mode.
+If a signalling NaN ends up with all of its payload bits zero,
+as a result of truncation or otherwise,
+then the least-signficant bit of the output payload is forced on
+in order to distinguish the result from an infinity.
 The possible errors are
 .B FLTERR_UFLOW
 if the value is unrepresentably tiny,
@@ -969,10 +992,185 @@ the result is returned anyway,
 with the unit bit interpreted as encoded in finite numbers,
 and discarded in infinities and NaNs.
 .PP
-For each of the format
-
-
-
+A number of IEEE and IEEE-like formats are predefined:
+for format
+.IR fmt ,
+there is
+an IEEE format description, named
+.BI fltfmt_ fmt \fR,
+together with encoding and decoding functions, named
+.BI fltfmt_enc fmt
+and
+.BI fltfmt_dec fmt \fR;
+for the most part,
+these functions use more convenient types
+to hold encoded values.
+.TP
+.B "f16"
+The IEEE\ 754 Binary16 format, with
+.IR w "\ =\ 5,"
+.IR p "\ =\ 11,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_f32"
+The IEEE\ 754 Binary32 (`single precision') format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 24,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint32 .
+.TP
+.B "fltfmt_f64"
+The IEEE\ 754 Binary64 (`double precision') format, with
+.IR w "\ =\ 11,"
+.IR p "\ =\ 53,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.B kludge64
+(see
+.BR bits (3)
+for details).
+.TP
+.B "fltfmt_f128"
+The IEEE\ 754 Binary128 (`quad precision') format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 113,"
+and
+.IR h "\ =\ 0;"uint
+stored in a big-endian vector of
+.BR uint32 ,
+just as for the generic functions described above.
+.TP
+.B "fltfmt_mini"
+An eight-bit `minifloat' format, with
+.IR w "\ =\ 4,"
+.IR p "\ =\ 4,"
+and
+.IR h "\ =\ 0;"
+stored in an
+.BR octet .
+.TP
+.B "fltfmt_bf16"
+The Google `BFloat16' format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 8,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_idblext80"
+The Intel 8087 80-bit `double extended' format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 64,"
+and
+.IR h "\ =\ 1;"
+stored as a
+.B uint16
+holding the sign and exponent,
+and a
+.B kludge64
+holding the significand.
+.
+.SS Native formats
+There are also functions for converting between
+.B struct floatbits
+and the implementation's native floating-point types
+.B float
+(abbreviated
+.BR flt ),
+.B double
+(abbreviated
+.BR dbl ),
+and
+.B "long double"
+(abbreviated
+.BR ldbl ).
+.PP
+For each native type abbreviation
+.IR ty ,
+there are functions
+.BI fltfmt_enc ty
+and
+.BI fltfmt_dec ty \fR,
+which respectively convert the value held in
+.B struct floatbits
+to or from a value of the corresponding C type.
+(The functions acting on
+.B long double
+values are only available if the platform supports C99 or later.)
+.PP
+The
+.BI fltfmt_enc ty
+functions read an input value from a
+.B struct floatbits
+pointer
+.I x
+and store the encoded result through a pointer
+.I z_out
+to the appropriate C type;
+the function also receives a rounding mode
+.IR r ,
+but see below.
+The
+.BI fltfmt_dec ty
+functions are given an input value of the appropriate C type,
+and store the decoded result in a
+.B struct floatbits
+structure pointed to by
+.I z_arg ;
+again, the function also receives a rounding mode
+.IR r ,
+but see below.
+.PP
+These functions can use two different strategies for conversion.
+If the compile-time configuration step detects
+that the implementation is using
+a specific, supported format for a native type,
+then conversions involving the native type
+are performed using the existing machinery for that format.
+For example, if,
+as is in fact nearly universal on modern-ish systems,
+the
+.B double
+type uses the IEEE\ 754 Binary64 format,
+then
+.B fltfmt_encdbl
+and
+.B fltfmt_decdbl
+use the functions
+.B fltfmt_encf64
+and
+.B fltfmt_decf64
+described above for the conversion.
+This approach has the benefit that
+everything is done under the control of the
+.B fltfmt
+machinery,
+which can faithfully preserve signs of zero values,
+and NaN payloads.
+The error conditions are, for the most part, the same as for the
+.B fltfmt_encieee
+and
+.B fltfmt_decieee
+functions described above.
+The encoding functions have an additional source of inexactness
+on PA-RISC and older MIPS processors
+which use the reversed quiet/signalling NaN convention:
+a quiet NaN with an all-zero payload
+is not representable on such implementations
+(the encoding is an infinity instead);
+in this situation,
+the least significant payload bit is forced on,
+just as if the payload required truncation,
+and
+.B FLTERR_INEXACT
+is returned.
 .
 .\"--------------------------------------------------------------------------
 .SH "SEE ALSO"
index deaf553667bbaee00acd68673766ed64852640a2..cb39eca4cbaa145f87ecc360fd7b3eb735f763b1 100644 (file)
 #include "bits.h"
 #include "fltfmt.h"
 #include "growbuf.h"
-#include "macros.h"
 #include "maths.h"
 
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding.  Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%.  But
+ * that doesn't actually work on GCC, or on Clang from not too long ago.  So
+ * use compiler-specific hacking to support this.
+ */
 #if GCC_VERSION_P(4, 4)
 #  pragma GCC optimize "-frounding-math"
 #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
@@ -531,7 +541,7 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x,
   return (rc);
 }
 
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
 
 /* IEEE (and related) format descriptions. */
 const struct fltfmt_ieeefmt
@@ -622,16 +632,24 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
     /* Copy the payload.
      *
      * If the payload is all-zero and we're meant to set a signalling NaN
-     * then report an exactness failure and set the low bit.
+     * then report an exactness failure and set the least-significant bit.
      */
     mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
-    for (i = 0; i < nw - mw; i++) z[i] = 0;
-    n = x->n; if (n > mw) n = nw;
-    t = shr(z + i, x->frac, n, sh); i += n;
-    if (i < nw) z[i++] = t;
-    sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
-    if (f&FLTF_QNAN) z0 |= B32(sh);
-    else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+    n = x->n;
+      if (n < mw) j = 0;
+      else { n = mw; j = sh; }
+    if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+      ERR(FLTERR_INEXACT);
+      n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+      z[i++] = 1;
+    } else {
+      for (i = 0; i < nw - mw; i++) z[i] = 0;
+      n = x->n; if (n > mw) n = mw;
+      t = shr(z + i, x->frac, n, sh); i += n;
+      if (i < nw) z[i++] = t;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+      if (f&FLTF_QNAN) z0 |= B32(sh);
+    }
 
     /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
     z0 |= M32(fmt->expwd) << esh;
@@ -1123,6 +1141,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 #  define DIGIT_BITS 4
 #endif
 
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
 /* --- @ENCFLT@ --- *
  *
  * Arguments:  @ty@ = the C type to encode
@@ -1205,8 +1230,23 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 
 #endif
 
+#ifdef FROB_NANS
+#  define FROBNAN_ENCDECLS     struct floatbits _y
+#  define FROBNAN_ENC do {                                             \
+     if (_x->f&FLTF_NANMASK) {                                         \
+       _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n;  \
+       _x = &_y;                                                       \
+     }                                                                 \
+   } while (0)
+#else
+#  define FROBNAN_ENCDECLS
+#  define FROBNAN_ENC do ; while (0)
+#endif
+
 #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {                    \
+  const struct floatbits *_x = (x);                                    \
   unsigned _rc = 0;                                                    \
+  FROBNAN_ENCDECLS;                                                    \
                                                                        \
   /* See if the native format is one that we recognize. */             \
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1215,8 +1255,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[1];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE: STORE32_B(_z, _t[0]); break;                    \
        case FLTFMT_LE: STORE32_L(_z, _t[0]); break;                    \
@@ -1227,8 +1267,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     case FLTFMT_IEEE_F64: {                                            \
       uint32 _t[2];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
-      (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
+                                                                       \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);           \
@@ -1247,8 +1288,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[4];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS);        \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);         \
@@ -1266,8 +1307,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[3];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_idblext80,                         \
+                           _t, _x, (r), FLTERR_ALLERRS);               \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE16_B(_z + 0, _t[0]);                                     \
@@ -1284,7 +1326,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     default: {                                                         \
       /* We must do this the hard way. */                              \
                                                                        \
-      const struct floatbits *_x = (x);                                        \
       ty _z;                                                           \
       unsigned _i;                                                     \
       ENC_ROUND_DECLS;                                                 \
@@ -1466,7 +1507,16 @@ unsigned fltfmt_encldbl(long double *z_out,
    } while (0)
 #endif
 
+#ifdef FROB_NANS
+#  define FROBNAN_DEC do {                                             \
+     if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK;                    \
+   } while (0)
+#else
+#  define FROBNAN_DEC do ; while (0)
+#endif
+
 #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {                    \
+  struct floatbits *_z = (z_out);                                      \
   unsigned _rc = 0;                                                    \
                                                                        \
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1480,8 +1530,7 @@ unsigned fltfmt_encldbl(long double *z_out,
        case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;                    \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC;         \
     } break;                                                           \
                                                                        \
     case FLTFMT_IEEE_F64: {                                            \
@@ -1500,8 +1549,7 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC;         \
     } break;                                                           \
                                                                        \
     case FLTFMT_IEEE_F128: {                                           \
@@ -1519,8 +1567,7 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t);                        \
+      _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC;                \
     } break;                                                           \
                                                                        \
     case FLTFMT_INTEL_F80: {                                           \
@@ -1538,12 +1585,10 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
-      _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t);           \
+      _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC;   \
     } break;                                                           \
                                                                        \
     default: {                                                         \
-      struct floatbits *_z = (z_out);                                  \
       ty _x = (x), _y;                                                 \
       unsigned _i, _n, _f = 0;                                         \
       uint32 _t;                                                       \
index 5b606077e3f09ad1b2cff6fc9c735b37b358b200..9ed7c68440cc34895eebeca6e9fc906d2672e7d1 100644 (file)
@@ -208,7 +208,7 @@ static void test_round(const struct tvec_reg *in, struct tvec_reg *out,
 static const struct tvec_test round_test =
   { "round", round_regs, 0, test_round };
 
-/*----- IEEE format conversion --------------------------------------------*/
+/*----- IEEE format conversion-------------------------------------------*/
 
 #define IEEE_FORMATS(_)                                                        \
   _(mini, 1)                                                           \
@@ -348,7 +348,7 @@ IEEE_FORMATS(DEF_TEST)
 #define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test,
 #define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST)
 
-/*----- Native format conversion ------------------------------------------*/
+/*----- Native format conversion-----------------------------------------*/
 
 #define NATIVE_FORMATS(_)                                              \
   _(flt, float, FLT)                                                   \
@@ -393,64 +393,64 @@ static const struct tvec_flag assume_flags[] = {
 static const struct tvec_flaginfo assume_flaginfo =
   { "assume", assume_flags, &tvrange_uint };
 
-struct nativeenv { struct tvec_env _env; unsigned ntv; };
-struct nativectx { unsigned af, want; };
+struct assumeenv { struct tvec_env _env; unsigned ntv; };
+struct assumectx { unsigned af, want; };
 
-static void setup_native(struct tvec_state *tv, const struct tvec_env *env,
+static void setup_assume(struct tvec_state *tv, const struct tvec_env *env,
                         void *pctx, void *ctx)
 {
-  const struct nativeenv *nenv = (const struct nativeenv *)env;
-  const struct ntvinfo *info = &ntvinfo[nenv->ntv];
-  struct nativectx *nctx = ctx;
+  const struct assumeenv *aenv = (const struct assumeenv *)env;
+  const struct ntvinfo *info = &ntvinfo[aenv->ntv];
+  struct assumectx *actx = ctx;
   double prec;
 
   switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {
     case FLTFMT_IEEE_F32:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
       break;
     case FLTFMT_IEEE_F64:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
       break;
     case FLTFMT_IEEE_F128:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                 AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113;
       break;
     case FLTFMT_INTEL_F80:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                 AF_PREC24 | AF_PREC53 | AF_PREC64;
       break;
     default:
-      nctx->af = 0;
-      if (NEGP(-0.0)) nctx->af |= AF_NEGZ;
+      actx->af = 0;
+      if (NEGP(-0.0)) actx->af |= AF_NEGZ;
 #ifdef INF
-      nctx->af |= AF_INF;
+      actx->af |= AF_INF;
 #endif
 #ifdef NAN
-      nctx->af |= AF_STDCNAN;
+      actx->af |= AF_STDCNAN;
 #endif
       prec = log(FLT_RADIX)/log(2.0)*info->mant_dig;
-      if (prec >= 24) nctx->af |= AF_PREC24;
-      if (prec >= 53) nctx->af |= AF_PREC53;
-      if (prec >= 64) nctx->af |= AF_PREC64;
-      if (prec >= 113) nctx->af |= AF_PREC113;
+      if (prec >= 24) actx->af |= AF_PREC24;
+      if (prec >= 53) actx->af |= AF_PREC53;
+      if (prec >= 64) actx->af |= AF_PREC64;
+      if (prec >= 113) actx->af |= AF_PREC113;
       break;
   }
-  nctx->want = 0;
+  actx->want = 0;
 }
 
-static int setvar_native(struct tvec_state *tv, const char *var,
+static int setvar_assume(struct tvec_state *tv, const char *var,
                         const union tvec_regval *rv, void *ctx)
 {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
 
-  if (STRCMP(var, ==, "@assume")) nctx->want = rv->u;
+  if (STRCMP(var, ==, "@assume")) actx->want = rv->u;
   else return (tvec_unkregerr(tv, var));
   return (0);
 }
 static const struct tvec_vardef assume_vardef =
-  { sizeof(struct tvec_reg), setvar_native,
+  { sizeof(struct tvec_reg), setvar_assume,
     { "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }};
-static const struct tvec_vardef *findvar_native
+static const struct tvec_vardef *findvar_assume
   (struct tvec_state *tv, const char *name, void **ctx_out, void *ctx)
 {
   if (STRCMP(name, ==, "@assume"))
@@ -459,11 +459,11 @@ static const struct tvec_vardef *findvar_native
     return (0);
 }
 
-static void before_native(struct tvec_state *tv, void *ctx)
+static void before_assume(struct tvec_state *tv, void *ctx)
 {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
 
-  if (nctx->want&~nctx->af)
+  if ((tv->f&TVSF_ACTIVE) && (actx->want&~actx->af))
     tvec_skip(tv, "unsatisfied assumption");
   else {
     DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN);
@@ -471,18 +471,18 @@ static void before_native(struct tvec_state *tv, void *ctx)
   }
 }
 
-static void after_native(struct tvec_state *tv, void *ctx)
+static void after_assume(struct tvec_state *tv, void *ctx)
 {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
 
-  nctx->want = 0;
+  actx->want = 0;
 }
 
 #define DEF_TEST(ty, cty, TY)                                          \
                                                                        \
-  static struct nativeenv ty##_env =                                   \
-    { { sizeof(struct nativectx),                                      \
-       setup_native, findvar_native, before_native, 0, after_native, 0 }, \
+  static struct assumeenv ty##_env =                                   \
+    { { sizeof(struct assumectx),                                      \
+       setup_assume, findvar_assume, before_assume, 0, after_assume, 0 }, \
       NTV_##TY };                                                      \
                                                                        \
   static const struct tvec_regdef enc##ty##_regs[] = {                 \
@@ -538,12 +538,93 @@ NATIVE_FORMATS(DEF_TEST)
 #define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test,
 #define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST)
 
+/*----- Direct conversions ------------------------------------------------*/
+
+#define DIRECT_CONVERSIONS(_)                                          \
+  _(flt, float, f32)                                                   \
+  _(dbl, double, f64)
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##ty##to##fty##e(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    tvec_allocbytes(&out[RZ_OUT].v, OUTSZ_##fty);                      \
+    out[RERR_OUT].v.u = fltfmt_##ty##to##fty##e(out[RZ_OUT].v.bytes.p, \
+                                               in[RX].v.f,             \
+                                               in[RROUND].v.u);        \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test ty##to##fty##e##_test =                        \
+    { #ty "to" #fty #e, ty##to##fty##_regs, &ty##_env._env,            \
+      test_##ty##to##fty##e };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef ty##to##fty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_float,    RX,     0,      { &tvflt_##cty } },     \
+    { "z",     &tvty_bytes,    RZ_OUT, 0,      { &fty##_range } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##fty##e##to##ty(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    cty z;                                                             \
+                                                                       \
+    out[RERR_OUT].v.u = fltfmt_##fty##e##to##ty(&z, in[RX].v.bytes.p,  \
+                                               in[RROUND].v.u);        \
+    out[RZ_OUT].v.f = z;                                               \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test fty##e##to##ty##_test =                        \
+    { #fty #e "to" #ty, fty##to##ty##_regs, &ty##_env._env,            \
+      test_##fty##e##to##ty };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef fty##to##ty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_bytes,    RX,     0,      { &fty##_range } },     \
+    { "z",     &tvty_float,    RZ_OUT, 0,      { &tvflt_##cty } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                            \
+  &ty##to##fty##l_test, &ty##to##fty##b_test,
+#define DEF_DIRECT_FTOC_TESTS(ty, cty, fty)                            \
+  &fty##l##to##ty##_test, &fty##b##to##ty##_test,
+#define DEF_DIRECT_TESTS(ty, cty, fty)                                 \
+  DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                                  \
+  DEF_DIRECT_FTOC_TESTS(ty, cty, fty)
+#define DIRECT_TESTS DIRECT_CONVERSIONS(DEF_DIRECT_TESTS)
+
 /*----- Main code ---------------------------------------------------------*/
 
 static const struct tvec_test *const tests[] = {
   &round_test,
   NATIVE_TESTS
   IEEE_TESTS
+  DIRECT_TESTS
   0
 };
 
index 7f63664ad76271c609b9ab6f148ef18c123f990a..b9670b0ec766a5755e3a095eea4d083a9b7e2418 100755 (executable)
@@ -73,9 +73,12 @@ def explore(wd, lobits, hibits):
     for hi in xrange(bit(hibits)):
       top = hi << hishift
       for lo in xrange(bit(lobits)):
+        while True:
+          fill = R.randrange(midbit)
+          if fill != 0 and fill != midbit - 1: break
         base = lo | top
         yield base
-        yield base | (R.randrange(midbit) << lobits)
+        yield base | (fill << lobits)
         yield base | m
 
 class ExploreParameters (object):
index bf85107c57fe9b79321dacd31fda8a5721149930..840f5140f99c8b68fa4421b38fbea1ca51f897b4 100644 (file)
@@ -225,6 +225,31 @@ err = INEXACT
 
 [encf64]
 
+;; NaN conversions
+
+f = QNAN
+m = #empty
+z = 7ff80000 00000000
+
+f = SNAN
+m = #empty
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00002000
+z = 7ff00000 00000001
+
+f = SNAN
+m = 00000000 00001fff
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00000000 00000001
+z = 7ff00000 00000001
+err = INEXACT
+
 ;; Check NaN truncation.
 
 errmask = 0
@@ -418,7 +443,7 @@ z = -0.625
 f = 0
 e = 3
 m = c90fdb00
-z = 6.2831854820251465
+z = 6.2831853
 
 @assume = stdc-nan
 f = QNAN
@@ -467,7 +492,7 @@ err = INEXACT
 @assume = ieee
 x = #nan
 f = QNAN
-m = 00000000 ; maybe
+;; m = ... something
 err = OK
 
 [encdbl]
@@ -508,6 +533,7 @@ err = INEXACT
 f = QNAN
 m = #empty
 z = #nan
+err = OK
 
 [decdbl]
 
@@ -545,7 +571,88 @@ err = INEXACT
 @assume = ieee
 x = #nan
 f = QNAN
-m = 00000000 00000000 ; maybe
+;; m = ... something
 err = OK
 
 ;;;--------------------------------------------------------------------------
+;;; Direct conversions.
+
+[f32btoflt]
+
+x = 00000000
+z = 0.0
+
+@assume = negz
+x = 80000000
+z = -0.0
+
+@assume = inf
+x = 7f800000
+z = #+inf
+
+@assume = inf
+x = ff800000
+z = #-inf
+
+@assume = stdc-nan | ieee
+x = 7fc00000
+z = #nan
+
+@assume = stdc-nan | ieee
+x = 7fa00000
+z = #nan
+
+x = 3f800000
+z = 1.0
+
+@assume = prec24
+x = 40c90fdb
+z = 6.2831853
+
+[f32ltoflt]
+
+x = 10c12549
+z = 678929
+
+[flttof32b]
+
+x = 678929
+z = 4925c110
+
+@assume = prec24
+x = 6.2831853
+z = 40c90fdb
+
+[flttof32l]
+
+x = 678929
+z = 10c12549
+
+[dbltof64b]
+
+x = 0.0
+z = 00000000 00000000
+
+x = -2964135146
+z = c1e615a3 9d400000
+
+@assume = prec53
+x = 6.283185307179586
+z = 401921fb 54442d18
+
+[dbltof64l]
+
+x = -2964135146
+z = 0000409d a315e6c1
+
+[f64btodbl]
+
+x = c1e615a3 9d400000
+z = -2964135146
+
+[f64ltodbl]
+
+x = 0000409d a315e6c1
+z = -2964135146
+
+;;;--------------------------------------------------------------------------