@@@ fltfmt wip

author Mark Wooding <mdw@distorted.org.uk>

Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
diff --git a/defs.man b/defs.man

index 379edbb7687c734d559cf91309f5b9fdcc719aee..67ff7be50cca0e541e590aec20ba3bc40a74d1d3 100644 (file)
--- a/defs.man
+++ b/defs.man
@@ -38,6 +38,8 @@
  .  ds /= \(!=
  .  ds <= \(<=
  .  ds >= \(>=
+.  ds mu \(mu
+.  ds sr \(sr
  .  ds ' \(fm
  .  ds , \h'\w'\ 'u/2u'
  .  if \n(.g \{\
@@ -53,6 +55,8 @@
  .  ds se
  .  ds us _
  .  ds ue
+.  ds mu *
+.  ds sr sqrt
  .  ds ' \(aq
  .  ds *d \,\fIdelta\/\fP
  .  ds /= /=
diff --git a/mem/arena.3.in b/mem/arena.3.in

index 09207fa6d7a8f7b708763e045d53d749081f5f99..593c25d5f53e4d80ca30c752b9c9f8801aa2a7f4 100644 (file)
--- a/mem/arena.3.in
+++ b/mem/arena.3.in
@@ -150,12 +150,12 @@ handlers which can't easily find the old block's size.
  The macro
  .B ALLOCV_SAFE_P
  returns nonzero if the product
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
  is representable in type
  .B size_t
  and zero otherwise;
  i.e., it returns true if it would be safe to try to allocate
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
  bytes.
  The macro
  .BR A_ALLOCV
diff --git a/test/bench.3.in b/test/bench.3.in

index d526de8f82154ac4e72a564360d724d3645b2b7b..3ce25f36e95f90237573a615c02e8d610dad3992 100644 (file)
--- a/test/bench.3.in
+++ b/test/bench.3.in
@@ -794,7 +794,7 @@ with the objective of finding an iteration count
  such that
  .I n
  iterations of the computation take more than
-.IB b ->target_s "" \fR/\(sr2
+.IB b ->target_s "" \fR/\*(sr2
  seconds.
  If measurement fails,
  then
@@ -807,7 +807,7 @@ is set to zero, and
  is filled in with the measurement;
  .IB t_out ->n
  is set to
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
  .PP
  The
  .B BENCH_MEASURE_TAG
@@ -902,7 +902,7 @@ the
  flag must be set in
  .IB t ->f \fR.
  If the timing is sufficient \(en if
-.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+.IR t\fB->t "\ \*(>=\ " target_s /\*(sr2
  \(en then
  .B bench_adapt
  returns a nonzero value to indicate that measurement is complete.
@@ -922,7 +922,7 @@ On exit, the timing data is updated,
  and
  .IB t ->n
  is set to the product
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
  .
  .SS Reporting results
  The
diff --git a/test/tvec-types.c b/test/tvec-types.c

index 8d07ea82b4df21dc43721e8055cbacec0394c992..1d01a3d44027561e3d47e43d9b26fffefbd705c5 100644 (file)
--- a/test/tvec-types.c
+++ b/test/tvec-types.c
@@ -528,18 +528,21 @@ static void format_size(const struct gprintf_ops *gops, void *go,
  static int eqish_floating_p(double x, double y,
                             const struct tvec_floatinfo *fi)
  {
-  double t;
+  double t, u;
  
+  /* NaNs and infinities are equal only to each other. */
    if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
    if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
  
+  /* Compare finite values. */
    switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
      case TVFF_EXACT:
        return (x == y && NEGP(x) == NEGP(y));
      case TVFF_ABSDELTA:
-      t = x - y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); return (t < fi->delta);
      case TVFF_RELDELTA:
-      t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
+      return (t <= u);
      default:
        abort();
    }
@@ -1928,8 +1931,8 @@ const struct tvec_regty tvty_float = {
  
  /* Predefined floating-point ranges. */
  const struct tvec_floatinfo
-  tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
-                 -FLT_MAX, FLT_MAX, 0.0 },
+  tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
+                 -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
    tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
                    -DBL_MAX, DBL_MAX, 0.0 },
    tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
diff --git a/utils/fltfmt-convert.c b/utils/fltfmt-convert.c

index 6db742befdef8f2058568ea42f10a65d70153ddb..cc24dba5f777b81f7201a4820ef09ef03121bc60 100644 (file)
--- a/utils/fltfmt-convert.c
+++ b/utils/fltfmt-convert.c
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
- * Floating-point format conversions
+ * Direct floating-point format conversions
   *
   * (c) 2024 Straylight/Edgeware
   */
@@ -40,6 +40,10 @@
    _(float, flt, f32)                                                   \
    _(double, dbl, f64)
  
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
  #define CONV_DECLS_flt_f32 uint32 t
  #if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32
  #  if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE
@@ -51,10 +55,19 @@
  #  else
  #    error "unimplemented byte order"
  #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_flt_f32 do { FLTFMT__FROB_NAN_F32(&t, rc); } while (0)
+#  ifdef FROB_NANS
+#    define CONV_FROBNANflt_f32 do {                                   \
+       if ((t&0x7f800000) != 0x7f800000 || !(t&0x007fffff))            \
+        ;                                                              \
+       else if (t&0x003fffff)                                          \
+        t ^= 0x00400000;                                               \
+       else {                                                          \
+        t = (t&0x80000000) | 0x00000001;                               \
+        rc |= FLTERR_INEXACT;                                          \
+       }                                                               \
+     } while (0)
  #  else
-#    define CONV_FROB_flt_f32 do ; while (0)
+#    define CONV_FROBNANflt_f32 do ; while (0)
  #  endif
  #else
  #  define CONV_LOAD_flt_f32 do {                                       \
@@ -69,7 +82,7 @@
       rc |= fltfmt_encflt(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
-#  define CONV_FROB_flt_f32 do ; while (0)
+#  define CONV_FROBNANflt_f32 do ; while (0)
  #endif
  #define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0)
  #define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0)
@@ -87,15 +100,26 @@
  #  else
  #    error "unimplemented byte order"
  #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_dbl_f64 do {                                     \
-       uint32 u[2];                                                    \
-       u[0] = HI64(t); u[1] = LO64(t);                                 \
-       FLTFMT__FROB_NAN_F64(&u, rc);                                   \
-       SET64(t, u[0], u[1]);                                           \
+#  ifdef FROB_NANS
+#    define CONV_FROBNANdbl_f64 do {                                   \
+       kludge64 u, v;                                                  \
+       SET64(u, 0x7ff00000, 0x00000000); AND64(v, t, u);               \
+       if (CMP64(v, ==, u)) {                                          \
+        SET64(u, 0x000fffff, 0xffffffff); AND64(v, t, u);              \
+        if (!ZERO64(v)) {                                              \
+          SET64(u, 0x0007ffff, 0xffffffff); AND64(v, t, u);            \
+          if (!ZERO64(v))                                              \
+            { SET64(u, 0x00080000, 0x00000000); XOR64(t, t, u); }      \
+          else {                                                       \
+            SET64(u, 0x80000000, 0x00000000); AND64(t, t, u);          \
+            SET64(u, 0x00000000, 0x00000001); OR64(t, t, u);           \
+            rc |= FLTERR_INEXACT;                                      \
+          }                                                            \
+        }                                                              \
+       }                                                               \
       } while (0)
  #  else
-#    define CONV_FROB_dbl_f64 do ; while (0)
+#    define CONV_FROBNANdbl_f64 do ; while (0)
  #  endif
  #else
  #  define CONV_LOAD_dbl_f64 do {                                       \
@@ -111,7 +135,7 @@
       rc |= fltfmt_encdbl(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
-#  define CONV_FROB_dbl_f64 do ; while (0)
+#  define CONV_FROBNANdbl_f64 do ; while (0)
  #endif
  #define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0)
  #define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0)
@@ -145,7 +169,7 @@
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STOREL_##cty##_##fty;                                         \
      return (rc);                                                       \
    }                                                                    \
@@ -155,7 +179,7 @@
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STOREB_##cty##_##fty;                                         \
      return (rc);                                                       \
    }
@@ -188,7 +212,7 @@ CONVERSIONS(DEF_CONV)
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADL_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }                                                                    \
@@ -198,7 +222,7 @@ CONVERSIONS(DEF_CONV)
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADB_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }
diff --git a/utils/fltfmt.3.in b/utils/fltfmt.3.in

index e5ae258bc8b430f9e4172f80dea9ee5221275ca0..6343c7966e7a1d3270a995f0dd43d418d4f2dfbc 100644 (file)
--- a/utils/fltfmt.3.in
+++ b/utils/fltfmt.3.in
@@ -295,7 +295,7 @@ This error flag is sometimes set conservatively.
  .TP
  .B FLTERR_UFLOW
  The conversion underflowed:
-a nonzero input was too tiny (in asbolute value) to represent,
+a nonzero input was too tiny (in absolute value) to represent,
  and a zero result was returned.
  .TP
  .B FLTERR_OFLOW
@@ -393,7 +393,7 @@ and
  are mask with set bits corresponding to their respective predicates.
  Bitwise boolean logic can be applied to these masks
  in order to calculate the masks corresponding to
-the same logical expresssion applied to the individual predicates.
+the same logical expression applied to the individual predicates.
  .B FRPMASK_INEXACT holds if
  .B LOW
  or
@@ -416,6 +416,73 @@ denoted
  This is likely a good option
  if there is no compelling argument for a different specific choice.
  .
+.SS Direct conversions
+The functions
+.B fltfmt_flttof32l
+and
+.B fltfmt_flttof32b
+convert a
+.B float
+argument to an IEEE\ 754 Binary32 value
+in little- or big-endian byte order, respectively;
+similarly
+.B fltfmt_dbltof64l
+and
+.B fltfmt_dbltof64b
+convert a
+.B double
+argument to an IEEE\ 754 Binary64 value
+in little- or big-endian byte order, respectively.
+The value to convert is given as
+.I x
+and the result is written at the address
+.IR p .
+.PP
+The functions
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary32 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR float ;
+similarly,
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary64 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR double .
+The value to convert is read from address
+.I p
+and the result is written to
+.RI * z_out \fR.
+.PP
+Both functions additionally take a rounding mode
+.I r
+which is applied if the conversion cannot be performed exactly,
+and return an error code as described above.
+.PP
+On many modern platforms, the
+.B float
+and
+.I double
+types are represented internally using the IEEE
+Binary32 and Binary64 formats,
+so these conversions are trivial, or nearly so.
+A complication arises on PA-RISC and older MIPS processors:
+see the descriptions of
+.B fltfmt_encieee
+and
+.B fltfmt_decdbl
+below for the details.
+.PP
+On other platforms,
+the conversion is decidedly nontrivial,
+and makes use of the machinery described below;
+this may also be useful for more complex conversions.
+.
  .SS The floatbits structure
  In order to avoid a combinatorial explosion in conversion operations,
  all the basic conversions involve,
@@ -540,7 +607,7 @@ or +1 if
  .B FLTF_NEG
  is clear,
  then the number represented is
-.IR s "\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.IR s "\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
  .PP
  A
  .B struct floatbits
@@ -564,6 +631,9 @@ neither allocates any storage or other resources,
  leaving
  .B frac
  null.
+In this state, it is safe to modify the arena pointer
+.B a
+if the default initialization is unsatisfactory.
  .PP
  The
  .B fltfmt_allocfrac
@@ -707,7 +777,7 @@ If
  .IR e "\ =\ \-" e \*(us0\*(ue
  then the value is zero or a subnormal,
  with the value
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e +1\*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e +1\*(se.
  In particular,
  if
  .IR m "\ =\ 0"
@@ -731,7 +801,7 @@ but the result will be as described.
  If
  .RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1"
  then the value is a (supposedly) normal number
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
  If
  .I h
  is zero then
@@ -821,58 +891,7 @@ in the notation above this is
  The precision;
  in the notation above this is
  .IR p .
-.PP
-The following IEEE formats descriptions are already defined.
-.TP
-.B "fltfmt_f16"
-The IEEE\ 754 Binary16 format, with
-.IR w "\ =\ 5,"
-.IR p "\ =\ 11,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f32"
-The IEEE\ 754 Binary32 (`single precision') format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 24,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f64"
-The IEEE\ 754 Binary64 (`double precision') format, with
-.IR w "\ =\ 11,"
-.IR p "\ =\ 53,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f128"
-The IEEE\ 754 Binary128 (`quad precision') format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 113,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_mini"
-An eight-bit `minifloat' format, with
-.IR w "\ =\ 4,"
-.IR p "\ =\ 4,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_bf16"
-The Google `BFloat16' format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 8,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_idblext80"
-The Intel 8087 80-bit `double extended' format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 64,"
-and
-.IR h "\ =\ 1."
-.PP
+
  The
  .B fltfmt_encieee
  and
@@ -921,6 +940,10 @@ discarding low-significant bits;
  if the input is a finite value,
  then the significand is rounded to fit
  according to the requested rounding mode.
+If a signalling NaN ends up with all of its payload bits zero,
+as a result of truncation or otherwise,
+then the least-signficant bit of the output payload is forced on
+in order to distinguish the result from an infinity.
  The possible errors are
  .B FLTERR_UFLOW
  if the value is unrepresentably tiny,
@@ -969,10 +992,185 @@ the result is returned anyway,
  with the unit bit interpreted as encoded in finite numbers,
  and discarded in infinities and NaNs.
  .PP
-For each of the format
-
-
-
+A number of IEEE and IEEE-like formats are predefined:
+for format
+.IR fmt ,
+there is
+an IEEE format description, named
+.BI fltfmt_ fmt \fR,
+together with encoding and decoding functions, named
+.BI fltfmt_enc fmt
+and
+.BI fltfmt_dec fmt \fR;
+for the most part,
+these functions use more convenient types
+to hold encoded values.
+.TP
+.B "f16"
+The IEEE\ 754 Binary16 format, with
+.IR w "\ =\ 5,"
+.IR p "\ =\ 11,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_f32"
+The IEEE\ 754 Binary32 (`single precision') format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 24,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint32 .
+.TP
+.B "fltfmt_f64"
+The IEEE\ 754 Binary64 (`double precision') format, with
+.IR w "\ =\ 11,"
+.IR p "\ =\ 53,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.B kludge64
+(see
+.BR bits (3)
+for details).
+.TP
+.B "fltfmt_f128"
+The IEEE\ 754 Binary128 (`quad precision') format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 113,"
+and
+.IR h "\ =\ 0;"uint
+stored in a big-endian vector of
+.BR uint32 ,
+just as for the generic functions described above.
+.TP
+.B "fltfmt_mini"
+An eight-bit `minifloat' format, with
+.IR w "\ =\ 4,"
+.IR p "\ =\ 4,"
+and
+.IR h "\ =\ 0;"
+stored in an
+.BR octet .
+.TP
+.B "fltfmt_bf16"
+The Google `BFloat16' format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 8,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_idblext80"
+The Intel 8087 80-bit `double extended' format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 64,"
+and
+.IR h "\ =\ 1;"
+stored as a
+.B uint16
+holding the sign and exponent,
+and a
+.B kludge64
+holding the significand.
+.
+.SS Native formats
+There are also functions for converting between
+.B struct floatbits
+and the implementation's native floating-point types
+.B float
+(abbreviated
+.BR flt ),
+.B double
+(abbreviated
+.BR dbl ),
+and
+.B "long double"
+(abbreviated
+.BR ldbl ).
+.PP
+For each native type abbreviation
+.IR ty ,
+there are functions
+.BI fltfmt_enc ty
+and
+.BI fltfmt_dec ty \fR,
+which respectively convert the value held in
+.B struct floatbits
+to or from a value of the corresponding C type.
+(The functions acting on
+.B long double
+values are only available if the platform supports C99 or later.)
+.PP
+The
+.BI fltfmt_enc ty
+functions read an input value from a
+.B struct floatbits
+pointer
+.I x
+and store the encoded result through a pointer
+.I z_out
+to the appropriate C type;
+the function also receives a rounding mode
+.IR r ,
+but see below.
+The
+.BI fltfmt_dec ty
+functions are given an input value of the appropriate C type,
+and store the decoded result in a
+.B struct floatbits
+structure pointed to by
+.I z_arg ;
+again, the function also receives a rounding mode
+.IR r ,
+but see below.
+.PP
+These functions can use two different strategies for conversion.
+If the compile-time configuration step detects
+that the implementation is using
+a specific, supported format for a native type,
+then conversions involving the native type
+are performed using the existing machinery for that format.
+For example, if,
+as is in fact nearly universal on modern-ish systems,
+the
+.B double
+type uses the IEEE\ 754 Binary64 format,
+then
+.B fltfmt_encdbl
+and
+.B fltfmt_decdbl
+use the functions
+.B fltfmt_encf64
+and
+.B fltfmt_decf64
+described above for the conversion.
+This approach has the benefit that
+everything is done under the control of the
+.B fltfmt
+machinery,
+which can faithfully preserve signs of zero values,
+and NaN payloads.
+The error conditions are, for the most part, the same as for the
+.B fltfmt_encieee
+and
+.B fltfmt_decieee
+functions described above.
+The encoding functions have an additional source of inexactness
+on PA-RISC and older MIPS processors
+which use the reversed quiet/signalling NaN convention:
+a quiet NaN with an all-zero payload
+is not representable on such implementations
+(the encoding is an infinity instead);
+in this situation,
+the least significant payload bit is forced on,
+just as if the payload required truncation,
+and
+.B FLTERR_INEXACT
+is returned.
  .
  .\"--------------------------------------------------------------------------
  .SH "SEE ALSO"
diff --git a/utils/fltfmt.c b/utils/fltfmt.c

index deaf553667bbaee00acd68673766ed64852640a2..cb39eca4cbaa145f87ecc360fd7b3eb735f763b1 100644 (file)
--- a/utils/fltfmt.c
+++ b/utils/fltfmt.c
@@ -39,9 +39,19 @@
  #include "bits.h"
  #include "fltfmt.h"
  #include "growbuf.h"
-#include "macros.h"
  #include "maths.h"
  
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding.  Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%.  But
+ * that doesn't actually work on GCC, or on Clang from not too long ago.  So
+ * use compiler-specific hacking to support this.
+ */
  #if GCC_VERSION_P(4, 4)
  #  pragma GCC optimize "-frounding-math"
  #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
@@ -531,7 +541,7 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x,
    return (rc);
  }
  
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
  
  /* IEEE (and related) format descriptions. */
  const struct fltfmt_ieeefmt
@@ -622,16 +632,24 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
      /* Copy the payload.
       *
       * If the payload is all-zero and we're meant to set a signalling NaN
-     * then report an exactness failure and set the low bit.
+     * then report an exactness failure and set the least-significant bit.
       */
      mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
-    for (i = 0; i < nw - mw; i++) z[i] = 0;
-    n = x->n; if (n > mw) n = nw;
-    t = shr(z + i, x->frac, n, sh); i += n;
-    if (i < nw) z[i++] = t;
-    sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
-    if (f&FLTF_QNAN) z0 |= B32(sh);
-    else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+    n = x->n;
+      if (n < mw) j = 0;
+      else { n = mw; j = sh; }
+    if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+      ERR(FLTERR_INEXACT);
+      n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+      z[i++] = 1;
+    } else {
+      for (i = 0; i < nw - mw; i++) z[i] = 0;
+      n = x->n; if (n > mw) n = mw;
+      t = shr(z + i, x->frac, n, sh); i += n;
+      if (i < nw) z[i++] = t;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+      if (f&FLTF_QNAN) z0 |= B32(sh);
+    }
  
      /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
      z0 |= M32(fmt->expwd) << esh;
@@ -1123,6 +1141,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  #  define DIGIT_BITS 4
  #endif
  
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
  /* --- @ENCFLT@ --- *
   *
   * Arguments:  @ty@ = the C type to encode
@@ -1205,8 +1230,23 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  
  #endif
  
+#ifdef FROB_NANS
+#  define FROBNAN_ENCDECLS     struct floatbits _y
+#  define FROBNAN_ENC do {                                             \
+     if (_x->f&FLTF_NANMASK) {                                         \
+       _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n;  \
+       _x = &_y;                                                       \
+     }                                                                 \
+   } while (0)
+#else
+#  define FROBNAN_ENCDECLS
+#  define FROBNAN_ENC do ; while (0)
+#endif
+
  #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {                    \
+  const struct floatbits *_x = (x);                                    \
    unsigned _rc = 0;                                                    \
+  FROBNAN_ENCDECLS;                                                    \
                                                                         \
    /* See if the native format is one that we recognize. */             \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1215,8 +1255,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[1];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE: STORE32_B(_z, _t[0]); break;                    \
         case FLTFMT_LE: STORE32_L(_z, _t[0]); break;                    \
@@ -1227,8 +1267,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
      case FLTFMT_IEEE_F64: {                                            \
        uint32 _t[2];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
-      (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
+                                                                       \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);           \
@@ -1247,8 +1288,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[4];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS);        \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);         \
@@ -1266,8 +1307,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[3];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_idblext80,                         \
+                           _t, _x, (r), FLTERR_ALLERRS);               \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE16_B(_z + 0, _t[0]);                                     \
@@ -1284,7 +1326,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
      default: {                                                         \
        /* We must do this the hard way. */                              \
                                                                         \
-      const struct floatbits *_x = (x);                                        \
        ty _z;                                                           \
        unsigned _i;                                                     \
        ENC_ROUND_DECLS;                                                 \
@@ -1466,7 +1507,16 @@ unsigned fltfmt_encldbl(long double *z_out,
     } while (0)
  #endif
  
+#ifdef FROB_NANS
+#  define FROBNAN_DEC do {                                             \
+     if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK;                    \
+   } while (0)
+#else
+#  define FROBNAN_DEC do ; while (0)
+#endif
+
  #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {                    \
+  struct floatbits *_z = (z_out);                                      \
    unsigned _rc = 0;                                                    \
                                                                         \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1480,8 +1530,7 @@ unsigned fltfmt_encldbl(long double *z_out,
         case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;                    \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC;         \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F64: {                                            \
@@ -1500,8 +1549,7 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC;         \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F128: {                                           \
@@ -1519,8 +1567,7 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t);                        \
+      _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC;                \
      } break;                                                           \
                                                                         \
      case FLTFMT_INTEL_F80: {                                           \
@@ -1538,12 +1585,10 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
-      _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t);           \
+      _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC;   \
      } break;                                                           \
                                                                         \
      default: {                                                         \
-      struct floatbits *_z = (z_out);                                  \
        ty _x = (x), _y;                                                 \
        unsigned _i, _n, _f = 0;                                         \
        uint32 _t;                                                       \
diff --git a/utils/t/fltfmt-test.c b/utils/t/fltfmt-test.c

index 5b606077e3f09ad1b2cff6fc9c735b37b358b200..9ed7c68440cc34895eebeca6e9fc906d2672e7d1 100644 (file)
--- a/utils/t/fltfmt-test.c
+++ b/utils/t/fltfmt-test.c
@@ -208,7 +208,7 @@ static void test_round(const struct tvec_reg *in, struct tvec_reg *out,
  static const struct tvec_test round_test =
    { "round", round_regs, 0, test_round };
  
-/*----- IEEE format conversion --------------------------------------------*/
+/*----- IEEE format conversions -------------------------------------------*/
  
  #define IEEE_FORMATS(_)                                                        \
    _(mini, 1)                                                           \
@@ -348,7 +348,7 @@ IEEE_FORMATS(DEF_TEST)
  #define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test,
  #define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST)
  
-/*----- Native format conversion ------------------------------------------*/
+/*----- Native format conversions -----------------------------------------*/
  
  #define NATIVE_FORMATS(_)                                              \
    _(flt, float, FLT)                                                   \
@@ -393,64 +393,64 @@ static const struct tvec_flag assume_flags[] = {
  static const struct tvec_flaginfo assume_flaginfo =
    { "assume", assume_flags, &tvrange_uint };
  
-struct nativeenv { struct tvec_env _env; unsigned ntv; };
-struct nativectx { unsigned af, want; };
+struct assumeenv { struct tvec_env _env; unsigned ntv; };
+struct assumectx { unsigned af, want; };
  
-static void setup_native(struct tvec_state *tv, const struct tvec_env *env,
+static void setup_assume(struct tvec_state *tv, const struct tvec_env *env,
                          void *pctx, void *ctx)
  {
-  const struct nativeenv *nenv = (const struct nativeenv *)env;
-  const struct ntvinfo *info = &ntvinfo[nenv->ntv];
-  struct nativectx *nctx = ctx;
+  const struct assumeenv *aenv = (const struct assumeenv *)env;
+  const struct ntvinfo *info = &ntvinfo[aenv->ntv];
+  struct assumectx *actx = ctx;
    double prec;
  
    switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {
      case FLTFMT_IEEE_F32:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
        break;
      case FLTFMT_IEEE_F64:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
        break;
      case FLTFMT_IEEE_F128:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                  AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113;
        break;
      case FLTFMT_INTEL_F80:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                  AF_PREC24 | AF_PREC53 | AF_PREC64;
        break;
      default:
-      nctx->af = 0;
-      if (NEGP(-0.0)) nctx->af |= AF_NEGZ;
+      actx->af = 0;
+      if (NEGP(-0.0)) actx->af |= AF_NEGZ;
  #ifdef INF
-      nctx->af |= AF_INF;
+      actx->af |= AF_INF;
  #endif
  #ifdef NAN
-      nctx->af |= AF_STDCNAN;
+      actx->af |= AF_STDCNAN;
  #endif
        prec = log(FLT_RADIX)/log(2.0)*info->mant_dig;
-      if (prec >= 24) nctx->af |= AF_PREC24;
-      if (prec >= 53) nctx->af |= AF_PREC53;
-      if (prec >= 64) nctx->af |= AF_PREC64;
-      if (prec >= 113) nctx->af |= AF_PREC113;
+      if (prec >= 24) actx->af |= AF_PREC24;
+      if (prec >= 53) actx->af |= AF_PREC53;
+      if (prec >= 64) actx->af |= AF_PREC64;
+      if (prec >= 113) actx->af |= AF_PREC113;
        break;
    }
-  nctx->want = 0;
+  actx->want = 0;
  }
  
-static int setvar_native(struct tvec_state *tv, const char *var,
+static int setvar_assume(struct tvec_state *tv, const char *var,
                          const union tvec_regval *rv, void *ctx)
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
-  if (STRCMP(var, ==, "@assume")) nctx->want = rv->u;
+  if (STRCMP(var, ==, "@assume")) actx->want = rv->u;
    else return (tvec_unkregerr(tv, var));
    return (0);
  }
  static const struct tvec_vardef assume_vardef =
-  { sizeof(struct tvec_reg), setvar_native,
+  { sizeof(struct tvec_reg), setvar_assume,
      { "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }};
-static const struct tvec_vardef *findvar_native
+static const struct tvec_vardef *findvar_assume
    (struct tvec_state *tv, const char *name, void **ctx_out, void *ctx)
  {
    if (STRCMP(name, ==, "@assume"))
@@ -459,11 +459,11 @@ static const struct tvec_vardef *findvar_native
      return (0);
  }
  
-static void before_native(struct tvec_state *tv, void *ctx)
+static void before_assume(struct tvec_state *tv, void *ctx)
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
-  if (nctx->want&~nctx->af)
+  if ((tv->f&TVSF_ACTIVE) && (actx->want&~actx->af))
      tvec_skip(tv, "unsatisfied assumption");
    else {
      DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN);
@@ -471,18 +471,18 @@ static void before_native(struct tvec_state *tv, void *ctx)
    }
  }
  
-static void after_native(struct tvec_state *tv, void *ctx)
+static void after_assume(struct tvec_state *tv, void *ctx)
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
-  nctx->want = 0;
+  actx->want = 0;
  }
  
  #define DEF_TEST(ty, cty, TY)                                          \
                                                                         \
-  static struct nativeenv ty##_env =                                   \
-    { { sizeof(struct nativectx),                                      \
-       setup_native, findvar_native, before_native, 0, after_native, 0 }, \
+  static struct assumeenv ty##_env =                                   \
+    { { sizeof(struct assumectx),                                      \
+       setup_assume, findvar_assume, before_assume, 0, after_assume, 0 }, \
        NTV_##TY };                                                      \
                                                                         \
    static const struct tvec_regdef enc##ty##_regs[] = {                 \
@@ -538,12 +538,93 @@ NATIVE_FORMATS(DEF_TEST)
  #define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test,
  #define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST)
  
+/*----- Direct conversions ------------------------------------------------*/
+
+#define DIRECT_CONVERSIONS(_)                                          \
+  _(flt, float, f32)                                                   \
+  _(dbl, double, f64)
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##ty##to##fty##e(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    tvec_allocbytes(&out[RZ_OUT].v, OUTSZ_##fty);                      \
+    out[RERR_OUT].v.u = fltfmt_##ty##to##fty##e(out[RZ_OUT].v.bytes.p, \
+                                               in[RX].v.f,             \
+                                               in[RROUND].v.u);        \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test ty##to##fty##e##_test =                        \
+    { #ty "to" #fty #e, ty##to##fty##_regs, &ty##_env._env,            \
+      test_##ty##to##fty##e };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef ty##to##fty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_float,    RX,     0,      { &tvflt_##cty } },     \
+    { "z",     &tvty_bytes,    RZ_OUT, 0,      { &fty##_range } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##fty##e##to##ty(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    cty z;                                                             \
+                                                                       \
+    out[RERR_OUT].v.u = fltfmt_##fty##e##to##ty(&z, in[RX].v.bytes.p,  \
+                                               in[RROUND].v.u);        \
+    out[RZ_OUT].v.f = z;                                               \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test fty##e##to##ty##_test =                        \
+    { #fty #e "to" #ty, fty##to##ty##_regs, &ty##_env._env,            \
+      test_##fty##e##to##ty };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef fty##to##ty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_bytes,    RX,     0,      { &fty##_range } },     \
+    { "z",     &tvty_float,    RZ_OUT, 0,      { &tvflt_##cty } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                            \
+  &ty##to##fty##l_test, &ty##to##fty##b_test,
+#define DEF_DIRECT_FTOC_TESTS(ty, cty, fty)                            \
+  &fty##l##to##ty##_test, &fty##b##to##ty##_test,
+#define DEF_DIRECT_TESTS(ty, cty, fty)                                 \
+  DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                                  \
+  DEF_DIRECT_FTOC_TESTS(ty, cty, fty)
+#define DIRECT_TESTS DIRECT_CONVERSIONS(DEF_DIRECT_TESTS)
+
  /*----- Main code ---------------------------------------------------------*/
  
  static const struct tvec_test *const tests[] = {
    &round_test,
    NATIVE_TESTS
    IEEE_TESTS
+  DIRECT_TESTS
    0
  };
  
diff --git a/utils/t/fltfmt-testgen b/utils/t/fltfmt-testgen

index 7f63664ad76271c609b9ab6f148ef18c123f990a..b9670b0ec766a5755e3a095eea4d083a9b7e2418 100755 (executable)
--- a/utils/t/fltfmt-testgen
+++ b/utils/t/fltfmt-testgen
@@ -73,9 +73,12 @@ def explore(wd, lobits, hibits):
      for hi in xrange(bit(hibits)):
        top = hi << hishift
        for lo in xrange(bit(lobits)):
+        while True:
+          fill = R.randrange(midbit)
+          if fill != 0 and fill != midbit - 1: break
          base = lo | top
          yield base
-        yield base | (R.randrange(midbit) << lobits)
+        yield base | (fill << lobits)
          yield base | m
  
  class ExploreParameters (object):
diff --git a/utils/t/fltfmt.tests b/utils/t/fltfmt.tests

index bf85107c57fe9b79321dacd31fda8a5721149930..840f5140f99c8b68fa4421b38fbea1ca51f897b4 100644 (file)
--- a/utils/t/fltfmt.tests
+++ b/utils/t/fltfmt.tests
@@ -225,6 +225,31 @@ err = INEXACT
  
  [encf64]
  
+;; NaN conversions
+
+f = QNAN
+m = #empty
+z = 7ff80000 00000000
+
+f = SNAN
+m = #empty
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00002000
+z = 7ff00000 00000001
+
+f = SNAN
+m = 00000000 00001fff
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00000000 00000001
+z = 7ff00000 00000001
+err = INEXACT
+
  ;; Check NaN truncation.
  
  errmask = 0
@@ -418,7 +443,7 @@ z = -0.625
  f = 0
  e = 3
  m = c90fdb00
-z = 6.2831854820251465
+z = 6.2831853
  
  @assume = stdc-nan
  f = QNAN
@@ -467,7 +492,7 @@ err = INEXACT
  @assume = ieee
  x = #nan
  f = QNAN
-m = 00000000 ; maybe
+;; m = ... something
  err = OK
  
  [encdbl]
@@ -508,6 +533,7 @@ err = INEXACT
  f = QNAN
  m = #empty
  z = #nan
+err = OK
  
  [decdbl]
  
@@ -545,7 +571,88 @@ err = INEXACT
  @assume = ieee
  x = #nan
  f = QNAN
-m = 00000000 00000000 ; maybe
+;; m = ... something
  err = OK
  
  ;;;--------------------------------------------------------------------------
+;;; Direct conversions.
+
+[f32btoflt]
+
+x = 00000000
+z = 0.0
+
+@assume = negz
+x = 80000000
+z = -0.0
+
+@assume = inf
+x = 7f800000
+z = #+inf
+
+@assume = inf
+x = ff800000
+z = #-inf
+
+@assume = stdc-nan | ieee
+x = 7fc00000
+z = #nan
+
+@assume = stdc-nan | ieee
+x = 7fa00000
+z = #nan
+
+x = 3f800000
+z = 1.0
+
+@assume = prec24
+x = 40c90fdb
+z = 6.2831853
+
+[f32ltoflt]
+
+x = 10c12549
+z = 678929
+
+[flttof32b]
+
+x = 678929
+z = 4925c110
+
+@assume = prec24
+x = 6.2831853
+z = 40c90fdb
+
+[flttof32l]
+
+x = 678929
+z = 10c12549
+
+[dbltof64b]
+
+x = 0.0
+z = 00000000 00000000
+
+x = -2964135146
+z = c1e615a3 9d400000
+
+@assume = prec53
+x = 6.283185307179586
+z = 401921fb 54442d18
+
+[dbltof64l]
+
+x = -2964135146
+z = 0000409d a315e6c1
+
+[f64btodbl]
+
+x = c1e615a3 9d400000
+z = -2964135146
+
+[f64ltodbl]
+
+x = 0000409d a315e6c1
+z = -2964135146
+
+;;;--------------------------------------------------------------------------
author	Mark Wooding <mdw@distorted.org.uk>
	Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
defs.man		patch \| blob \| blame \| history
mem/arena.3.in		patch \| blob \| blame \| history
test/bench.3.in		patch \| blob \| blame \| history
test/tvec-types.c		patch \| blob \| blame \| history
utils/fltfmt-convert.c		patch \| blob \| blame \| history
utils/fltfmt.3.in		patch \| blob \| blame \| history
utils/fltfmt.c		patch \| blob \| blame \| history
utils/t/fltfmt-test.c		patch \| blob \| blame \| history
utils/t/fltfmt-testgen		patch \| blob \| blame \| history
utils/t/fltfmt.tests		patch \| blob \| blame \| history