@@@ fltfmt wip

author Mark Wooding <mdw@distorted.org.uk>

Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
diff --git a/defs.man b/defs.man

index 379edbb7687c734d559cf91309f5b9fdcc719aee..67ff7be50cca0e541e590aec20ba3bc40a74d1d3 100644 (file)
--- a/defs.man
+++ b/defs.man
@@ -38,6 +38,8 @@
  .  ds /= \(!=
  .  ds <= \(<=
  .  ds >= \(>=
  .  ds /= \(!=
  .  ds <= \(<=
  .  ds >= \(>=
+.  ds mu \(mu
+.  ds sr \(sr
  .  ds ' \(fm
  .  ds , \h'\w'\ 'u/2u'
  .  if \n(.g \{\
  .  ds ' \(fm
  .  ds , \h'\w'\ 'u/2u'
  .  if \n(.g \{\
@@ -53,6 +55,8 @@
  .  ds se
  .  ds us _
  .  ds ue
  .  ds se
  .  ds us _
  .  ds ue
+.  ds mu *
+.  ds sr sqrt
  .  ds ' \(aq
  .  ds *d \,\fIdelta\/\fP
  .  ds /= /=
  .  ds ' \(aq
  .  ds *d \,\fIdelta\/\fP
  .  ds /= /=
diff --git a/mem/arena.3.in b/mem/arena.3.in

index 09207fa6d7a8f7b708763e045d53d749081f5f99..593c25d5f53e4d80ca30c752b9c9f8801aa2a7f4 100644 (file)
--- a/mem/arena.3.in
+++ b/mem/arena.3.in
@@ -150,12 +150,12 @@ handlers which can't easily find the old block's size.
  The macro
  .B ALLOCV_SAFE_P
  returns nonzero if the product
  The macro
  .B ALLOCV_SAFE_P
  returns nonzero if the product
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
  is representable in type
  .B size_t
  and zero otherwise;
  i.e., it returns true if it would be safe to try to allocate
  is representable in type
  .B size_t
  and zero otherwise;
  i.e., it returns true if it would be safe to try to allocate
-.IR n "\ \(mu\ " sz
+.IR n "\ \*(mu\ " sz
  bytes.
  The macro
  .BR A_ALLOCV
  bytes.
  The macro
  .BR A_ALLOCV
diff --git a/test/bench.3.in b/test/bench.3.in

index d526de8f82154ac4e72a564360d724d3645b2b7b..3ce25f36e95f90237573a615c02e8d610dad3992 100644 (file)
--- a/test/bench.3.in
+++ b/test/bench.3.in
@@ -794,7 +794,7 @@ with the objective of finding an iteration count
  such that
  .I n
  iterations of the computation take more than
  such that
  .I n
  iterations of the computation take more than
-.IB b ->target_s "" \fR/\(sr2
+.IB b ->target_s "" \fR/\*(sr2
  seconds.
  If measurement fails,
  then
  seconds.
  If measurement fails,
  then
@@ -807,7 +807,7 @@ is set to zero, and
  is filled in with the measurement;
  .IB t_out ->n
  is set to
  is filled in with the measurement;
  .IB t_out ->n
  is set to
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
  .PP
  The
  .B BENCH_MEASURE_TAG
  .PP
  The
  .B BENCH_MEASURE_TAG
@@ -902,7 +902,7 @@ the
  flag must be set in
  .IB t ->f \fR.
  If the timing is sufficient \(en if
  flag must be set in
  .IB t ->f \fR.
  If the timing is sufficient \(en if
-.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
+.IR t\fB->t "\ \*(>=\ " target_s /\*(sr2
  \(en then
  .B bench_adapt
  returns a nonzero value to indicate that measurement is complete.
  \(en then
  .B bench_adapt
  returns a nonzero value to indicate that measurement is complete.
@@ -922,7 +922,7 @@ On exit, the timing data is updated,
  and
  .IB t ->n
  is set to the product
  and
  .IB t ->n
  is set to the product
-.IR n "\ \(mu\ " base .
+.IR n "\ \*(mu\ " base .
  .
  .SS Reporting results
  The
  .
  .SS Reporting results
  The
diff --git a/test/tvec-types.c b/test/tvec-types.c

index 8d07ea82b4df21dc43721e8055cbacec0394c992..1d01a3d44027561e3d47e43d9b26fffefbd705c5 100644 (file)
--- a/test/tvec-types.c
+++ b/test/tvec-types.c
@@ -528,18 +528,21 @@ static void format_size(const struct gprintf_ops *gops, void *go,
  static int eqish_floating_p(double x, double y,
                             const struct tvec_floatinfo *fi)
  {
  static int eqish_floating_p(double x, double y,
                             const struct tvec_floatinfo *fi)
  {
-  double t;
+  double t, u;
  
  
+  /* NaNs and infinities are equal only to each other. */
    if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
    if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
  
    if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
    if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
  
+  /* Compare finite values. */
    switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
      case TVFF_EXACT:
        return (x == y && NEGP(x) == NEGP(y));
      case TVFF_ABSDELTA:
    switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
      case TVFF_EXACT:
        return (x == y && NEGP(x) == NEGP(y));
      case TVFF_ABSDELTA:
-      t = x - y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); return (t < fi->delta);
      case TVFF_RELDELTA:
      case TVFF_RELDELTA:
-      t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
+      t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
+      return (t <= u);
      default:
        abort();
    }
      default:
        abort();
    }
@@ -1928,8 +1931,8 @@ const struct tvec_regty tvty_float = {
  
  /* Predefined floating-point ranges. */
  const struct tvec_floatinfo
  
  /* Predefined floating-point ranges. */
  const struct tvec_floatinfo
-  tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
-                 -FLT_MAX, FLT_MAX, 0.0 },
+  tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
+                 -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
    tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
                    -DBL_MAX, DBL_MAX, 0.0 },
    tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
    tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
                    -DBL_MAX, DBL_MAX, 0.0 },
    tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
diff --git a/utils/fltfmt-convert.c b/utils/fltfmt-convert.c

index 6db742befdef8f2058568ea42f10a65d70153ddb..cc24dba5f777b81f7201a4820ef09ef03121bc60 100644 (file)
--- a/utils/fltfmt-convert.c
+++ b/utils/fltfmt-convert.c
@@ -1,6 +1,6 @@
  /* -*-c-*-
   *
  /* -*-c-*-
   *
- * Floating-point format conversions
+ * Direct floating-point format conversions
   *
   * (c) 2024 Straylight/Edgeware
   */
   *
   * (c) 2024 Straylight/Edgeware
   */
@@ -40,6 +40,10 @@
    _(float, flt, f32)                                                   \
    _(double, dbl, f64)
  
    _(float, flt, f32)                                                   \
    _(double, dbl, f64)
  
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
  #define CONV_DECLS_flt_f32 uint32 t
  #if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32
  #  if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE
  #define CONV_DECLS_flt_f32 uint32 t
  #if (FLT_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) == FLTFMT_IEEE_F32
  #  if (FLT_FORMAT&FLTFMT_ENDMASK) == FLTFMT_BE
@@ -51,10 +55,19 @@
  #  else
  #    error "unimplemented byte order"
  #  endif
  #  else
  #    error "unimplemented byte order"
  #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_flt_f32 do { FLTFMT__FROB_NAN_F32(&t, rc); } while (0)
+#  ifdef FROB_NANS
+#    define CONV_FROBNANflt_f32 do {                                   \
+       if ((t&0x7f800000) != 0x7f800000 || !(t&0x007fffff))            \
+        ;                                                              \
+       else if (t&0x003fffff)                                          \
+        t ^= 0x00400000;                                               \
+       else {                                                          \
+        t = (t&0x80000000) | 0x00000001;                               \
+        rc |= FLTERR_INEXACT;                                          \
+       }                                                               \
+     } while (0)
  #  else
  #  else
-#    define CONV_FROB_flt_f32 do ; while (0)
+#    define CONV_FROBNANflt_f32 do ; while (0)
  #  endif
  #else
  #  define CONV_LOAD_flt_f32 do {                                       \
  #  endif
  #else
  #  define CONV_LOAD_flt_f32 do {                                       \
@@ -69,7 +82,7 @@
       rc |= fltfmt_encflt(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
       rc |= fltfmt_encflt(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
-#  define CONV_FROB_flt_f32 do ; while (0)
+#  define CONV_FROBNANflt_f32 do ; while (0)
  #endif
  #define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0)
  #define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0)
  #endif
  #define CONV_LOADB_flt_f32 do { t = LOAD32_B(p); } while (0)
  #define CONV_LOADL_flt_f32 do { t = LOAD32_L(p); } while (0)
@@ -87,15 +100,26 @@
  #  else
  #    error "unimplemented byte order"
  #  endif
  #  else
  #    error "unimplemented byte order"
  #  endif
-#  ifdef FLTFMT__MUST_FROB_NANS
-#    define CONV_FROB_dbl_f64 do {                                     \
-       uint32 u[2];                                                    \
-       u[0] = HI64(t); u[1] = LO64(t);                                 \
-       FLTFMT__FROB_NAN_F64(&u, rc);                                   \
-       SET64(t, u[0], u[1]);                                           \
+#  ifdef FROB_NANS
+#    define CONV_FROBNANdbl_f64 do {                                   \
+       kludge64 u, v;                                                  \
+       SET64(u, 0x7ff00000, 0x00000000); AND64(v, t, u);               \
+       if (CMP64(v, ==, u)) {                                          \
+        SET64(u, 0x000fffff, 0xffffffff); AND64(v, t, u);              \
+        if (!ZERO64(v)) {                                              \
+          SET64(u, 0x0007ffff, 0xffffffff); AND64(v, t, u);            \
+          if (!ZERO64(v))                                              \
+            { SET64(u, 0x00080000, 0x00000000); XOR64(t, t, u); }      \
+          else {                                                       \
+            SET64(u, 0x80000000, 0x00000000); AND64(t, t, u);          \
+            SET64(u, 0x00000000, 0x00000001); OR64(t, t, u);           \
+            rc |= FLTERR_INEXACT;                                      \
+          }                                                            \
+        }                                                              \
+       }                                                               \
       } while (0)
  #  else
       } while (0)
  #  else
-#    define CONV_FROB_dbl_f64 do ; while (0)
+#    define CONV_FROBNANdbl_f64 do ; while (0)
  #  endif
  #else
  #  define CONV_LOAD_dbl_f64 do {                                       \
  #  endif
  #else
  #  define CONV_LOAD_dbl_f64 do {                                       \
@@ -111,7 +135,7 @@
       rc |= fltfmt_encdbl(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
       rc |= fltfmt_encdbl(z_out, &u, r);                                        \
       fltfmt_freebits(&u);                                              \
     } while (0)
-#  define CONV_FROB_dbl_f64 do ; while (0)
+#  define CONV_FROBNANdbl_f64 do ; while (0)
  #endif
  #define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0)
  #define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0)
  #endif
  #define CONV_LOADB_dbl_f64 do { LOAD64_B_(t, p); } while (0)
  #define CONV_LOADL_dbl_f64 do { LOAD64_L_(t, p); } while (0)
@@ -145,7 +169,7 @@
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STOREL_##cty##_##fty;                                         \
      return (rc);                                                       \
    }                                                                    \
      CONV_STOREL_##cty##_##fty;                                         \
      return (rc);                                                       \
    }                                                                    \
@@ -155,7 +179,7 @@
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOAD_##cty##_##fty;                                           \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STOREB_##cty##_##fty;                                         \
      return (rc);                                                       \
    }
      CONV_STOREB_##cty##_##fty;                                         \
      return (rc);                                                       \
    }
@@ -188,7 +212,7 @@ CONVERSIONS(DEF_CONV)
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADL_##cty##_##fty;                                          \
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADL_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }                                                                    \
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }                                                                    \
@@ -198,7 +222,7 @@ CONVERSIONS(DEF_CONV)
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADB_##cty##_##fty;                                          \
      unsigned rc = 0; CONV_DECLS_##cty##_##fty;                         \
                                                                         \
      CONV_LOADB_##cty##_##fty;                                          \
-    CONV_FROB_##cty##_##fty;                                           \
+    CONV_FROBNAN##cty##_##fty;                                         \
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }
      CONV_STORE_##cty##_##fty;                                          \
      return (rc);                                                       \
    }
diff --git a/utils/fltfmt.3.in b/utils/fltfmt.3.in

index e5ae258bc8b430f9e4172f80dea9ee5221275ca0..6343c7966e7a1d3270a995f0dd43d418d4f2dfbc 100644 (file)
--- a/utils/fltfmt.3.in
+++ b/utils/fltfmt.3.in
@@ -295,7 +295,7 @@ This error flag is sometimes set conservatively.
  .TP
  .B FLTERR_UFLOW
  The conversion underflowed:
  .TP
  .B FLTERR_UFLOW
  The conversion underflowed:
-a nonzero input was too tiny (in asbolute value) to represent,
+a nonzero input was too tiny (in absolute value) to represent,
  and a zero result was returned.
  .TP
  .B FLTERR_OFLOW
  and a zero result was returned.
  .TP
  .B FLTERR_OFLOW
@@ -393,7 +393,7 @@ and
  are mask with set bits corresponding to their respective predicates.
  Bitwise boolean logic can be applied to these masks
  in order to calculate the masks corresponding to
  are mask with set bits corresponding to their respective predicates.
  Bitwise boolean logic can be applied to these masks
  in order to calculate the masks corresponding to
-the same logical expresssion applied to the individual predicates.
+the same logical expression applied to the individual predicates.
  .B FRPMASK_INEXACT holds if
  .B LOW
  or
  .B FRPMASK_INEXACT holds if
  .B LOW
  or
@@ -416,6 +416,73 @@ denoted
  This is likely a good option
  if there is no compelling argument for a different specific choice.
  .
  This is likely a good option
  if there is no compelling argument for a different specific choice.
  .
+.SS Direct conversions
+The functions
+.B fltfmt_flttof32l
+and
+.B fltfmt_flttof32b
+convert a
+.B float
+argument to an IEEE\ 754 Binary32 value
+in little- or big-endian byte order, respectively;
+similarly
+.B fltfmt_dbltof64l
+and
+.B fltfmt_dbltof64b
+convert a
+.B double
+argument to an IEEE\ 754 Binary64 value
+in little- or big-endian byte order, respectively.
+The value to convert is given as
+.I x
+and the result is written at the address
+.IR p .
+.PP
+The functions
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary32 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR float ;
+similarly,
+.B fltfmt_f32ltoflt
+and
+.B fltfmt_f32btoflt
+convert an IEEE\ 754 Binary64 value,
+in little- or big-endian byte order, respectively,
+to a
+.BR double .
+The value to convert is read from address
+.I p
+and the result is written to
+.RI * z_out \fR.
+.PP
+Both functions additionally take a rounding mode
+.I r
+which is applied if the conversion cannot be performed exactly,
+and return an error code as described above.
+.PP
+On many modern platforms, the
+.B float
+and
+.I double
+types are represented internally using the IEEE
+Binary32 and Binary64 formats,
+so these conversions are trivial, or nearly so.
+A complication arises on PA-RISC and older MIPS processors:
+see the descriptions of
+.B fltfmt_encieee
+and
+.B fltfmt_decdbl
+below for the details.
+.PP
+On other platforms,
+the conversion is decidedly nontrivial,
+and makes use of the machinery described below;
+this may also be useful for more complex conversions.
+.
  .SS The floatbits structure
  In order to avoid a combinatorial explosion in conversion operations,
  all the basic conversions involve,
  .SS The floatbits structure
  In order to avoid a combinatorial explosion in conversion operations,
  all the basic conversions involve,
@@ -540,7 +607,7 @@ or +1 if
  .B FLTF_NEG
  is clear,
  then the number represented is
  .B FLTF_NEG
  is clear,
  then the number represented is
-.IR s "\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.IR s "\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
  .PP
  A
  .B struct floatbits
  .PP
  A
  .B struct floatbits
@@ -564,6 +631,9 @@ neither allocates any storage or other resources,
  leaving
  .B frac
  null.
  leaving
  .B frac
  null.
+In this state, it is safe to modify the arena pointer
+.B a
+if the default initialization is unsatisfactory.
  .PP
  The
  .B fltfmt_allocfrac
  .PP
  The
  .B fltfmt_allocfrac
@@ -707,7 +777,7 @@ If
  .IR e "\ =\ \-" e \*(us0\*(ue
  then the value is zero or a subnormal,
  with the value
  .IR e "\ =\ \-" e \*(us0\*(ue
  then the value is zero or a subnormal,
  with the value
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e +1\*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e +1\*(se.
  In particular,
  if
  .IR m "\ =\ 0"
  In particular,
  if
  .IR m "\ =\ 0"
@@ -731,7 +801,7 @@ but the result will be as described.
  If
  .RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1"
  then the value is a (supposedly) normal number
  If
  .RI "1\ \-\ " e "\*(us0\*(ue \*(<=\ " e "\ < 2" e "\*(us0\*(ue\ +\ 1"
  then the value is a (supposedly) normal number
-.RI (\-1)\*(ss s "\*(se\ \(mu\ " m "\ \(mu\ 2\*(ss" e \*(se.
+.RI (\-1)\*(ss s "\*(se\ \*(mu\ " m "\ \*(mu\ 2\*(ss" e \*(se.
  If
  .I h
  is zero then
  If
  .I h
  is zero then
@@ -821,58 +891,7 @@ in the notation above this is
  The precision;
  in the notation above this is
  .IR p .
  The precision;
  in the notation above this is
  .IR p .
-.PP
-The following IEEE formats descriptions are already defined.
-.TP
-.B "fltfmt_f16"
-The IEEE\ 754 Binary16 format, with
-.IR w "\ =\ 5,"
-.IR p "\ =\ 11,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f32"
-The IEEE\ 754 Binary32 (`single precision') format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 24,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f64"
-The IEEE\ 754 Binary64 (`double precision') format, with
-.IR w "\ =\ 11,"
-.IR p "\ =\ 53,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_f128"
-The IEEE\ 754 Binary128 (`quad precision') format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 113,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_mini"
-An eight-bit `minifloat' format, with
-.IR w "\ =\ 4,"
-.IR p "\ =\ 4,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_bf16"
-The Google `BFloat16' format, with
-.IR w "\ =\ 8,"
-.IR p "\ =\ 8,"
-and
-.IR h "\ =\ 0."
-.TP
-.B "fltfmt_idblext80"
-The Intel 8087 80-bit `double extended' format, with
-.IR w "\ =\ 15,"
-.IR p "\ =\ 64,"
-and
-.IR h "\ =\ 1."
-.PP
+
  The
  .B fltfmt_encieee
  and
  The
  .B fltfmt_encieee
  and
@@ -921,6 +940,10 @@ discarding low-significant bits;
  if the input is a finite value,
  then the significand is rounded to fit
  according to the requested rounding mode.
  if the input is a finite value,
  then the significand is rounded to fit
  according to the requested rounding mode.
+If a signalling NaN ends up with all of its payload bits zero,
+as a result of truncation or otherwise,
+then the least-signficant bit of the output payload is forced on
+in order to distinguish the result from an infinity.
  The possible errors are
  .B FLTERR_UFLOW
  if the value is unrepresentably tiny,
  The possible errors are
  .B FLTERR_UFLOW
  if the value is unrepresentably tiny,
@@ -969,10 +992,185 @@ the result is returned anyway,
  with the unit bit interpreted as encoded in finite numbers,
  and discarded in infinities and NaNs.
  .PP
  with the unit bit interpreted as encoded in finite numbers,
  and discarded in infinities and NaNs.
  .PP
-For each of the format
-
-
-
+A number of IEEE and IEEE-like formats are predefined:
+for format
+.IR fmt ,
+there is
+an IEEE format description, named
+.BI fltfmt_ fmt \fR,
+together with encoding and decoding functions, named
+.BI fltfmt_enc fmt
+and
+.BI fltfmt_dec fmt \fR;
+for the most part,
+these functions use more convenient types
+to hold encoded values.
+.TP
+.B "f16"
+The IEEE\ 754 Binary16 format, with
+.IR w "\ =\ 5,"
+.IR p "\ =\ 11,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_f32"
+The IEEE\ 754 Binary32 (`single precision') format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 24,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint32 .
+.TP
+.B "fltfmt_f64"
+The IEEE\ 754 Binary64 (`double precision') format, with
+.IR w "\ =\ 11,"
+.IR p "\ =\ 53,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.B kludge64
+(see
+.BR bits (3)
+for details).
+.TP
+.B "fltfmt_f128"
+The IEEE\ 754 Binary128 (`quad precision') format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 113,"
+and
+.IR h "\ =\ 0;"uint
+stored in a big-endian vector of
+.BR uint32 ,
+just as for the generic functions described above.
+.TP
+.B "fltfmt_mini"
+An eight-bit `minifloat' format, with
+.IR w "\ =\ 4,"
+.IR p "\ =\ 4,"
+and
+.IR h "\ =\ 0;"
+stored in an
+.BR octet .
+.TP
+.B "fltfmt_bf16"
+The Google `BFloat16' format, with
+.IR w "\ =\ 8,"
+.IR p "\ =\ 8,"
+and
+.IR h "\ =\ 0;"
+stored in a
+.BR uint16 .
+.TP
+.B "fltfmt_idblext80"
+The Intel 8087 80-bit `double extended' format, with
+.IR w "\ =\ 15,"
+.IR p "\ =\ 64,"
+and
+.IR h "\ =\ 1;"
+stored as a
+.B uint16
+holding the sign and exponent,
+and a
+.B kludge64
+holding the significand.
+.
+.SS Native formats
+There are also functions for converting between
+.B struct floatbits
+and the implementation's native floating-point types
+.B float
+(abbreviated
+.BR flt ),
+.B double
+(abbreviated
+.BR dbl ),
+and
+.B "long double"
+(abbreviated
+.BR ldbl ).
+.PP
+For each native type abbreviation
+.IR ty ,
+there are functions
+.BI fltfmt_enc ty
+and
+.BI fltfmt_dec ty \fR,
+which respectively convert the value held in
+.B struct floatbits
+to or from a value of the corresponding C type.
+(The functions acting on
+.B long double
+values are only available if the platform supports C99 or later.)
+.PP
+The
+.BI fltfmt_enc ty
+functions read an input value from a
+.B struct floatbits
+pointer
+.I x
+and store the encoded result through a pointer
+.I z_out
+to the appropriate C type;
+the function also receives a rounding mode
+.IR r ,
+but see below.
+The
+.BI fltfmt_dec ty
+functions are given an input value of the appropriate C type,
+and store the decoded result in a
+.B struct floatbits
+structure pointed to by
+.I z_arg ;
+again, the function also receives a rounding mode
+.IR r ,
+but see below.
+.PP
+These functions can use two different strategies for conversion.
+If the compile-time configuration step detects
+that the implementation is using
+a specific, supported format for a native type,
+then conversions involving the native type
+are performed using the existing machinery for that format.
+For example, if,
+as is in fact nearly universal on modern-ish systems,
+the
+.B double
+type uses the IEEE\ 754 Binary64 format,
+then
+.B fltfmt_encdbl
+and
+.B fltfmt_decdbl
+use the functions
+.B fltfmt_encf64
+and
+.B fltfmt_decf64
+described above for the conversion.
+This approach has the benefit that
+everything is done under the control of the
+.B fltfmt
+machinery,
+which can faithfully preserve signs of zero values,
+and NaN payloads.
+The error conditions are, for the most part, the same as for the
+.B fltfmt_encieee
+and
+.B fltfmt_decieee
+functions described above.
+The encoding functions have an additional source of inexactness
+on PA-RISC and older MIPS processors
+which use the reversed quiet/signalling NaN convention:
+a quiet NaN with an all-zero payload
+is not representable on such implementations
+(the encoding is an infinity instead);
+in this situation,
+the least significant payload bit is forced on,
+just as if the payload required truncation,
+and
+.B FLTERR_INEXACT
+is returned.
  .
  .\"--------------------------------------------------------------------------
  .SH "SEE ALSO"
  .
  .\"--------------------------------------------------------------------------
  .SH "SEE ALSO"
diff --git a/utils/fltfmt.c b/utils/fltfmt.c

index deaf553667bbaee00acd68673766ed64852640a2..cb39eca4cbaa145f87ecc360fd7b3eb735f763b1 100644 (file)
--- a/utils/fltfmt.c
+++ b/utils/fltfmt.c
@@ -39,9 +39,19 @@
  #include "bits.h"
  #include "fltfmt.h"
  #include "growbuf.h"
  #include "bits.h"
  #include "fltfmt.h"
  #include "growbuf.h"
-#include "macros.h"
  #include "maths.h"
  
  #include "maths.h"
  
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding.  Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%.  But
+ * that doesn't actually work on GCC, or on Clang from not too long ago.  So
+ * use compiler-specific hacking to support this.
+ */
  #if GCC_VERSION_P(4, 4)
  #  pragma GCC optimize "-frounding-math"
  #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
  #if GCC_VERSION_P(4, 4)
  #  pragma GCC optimize "-frounding-math"
  #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
@@ -531,7 +541,7 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x,
    return (rc);
  }
  
    return (rc);
  }
  
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
  
  /* IEEE (and related) format descriptions. */
  const struct fltfmt_ieeefmt
  
  /* IEEE (and related) format descriptions. */
  const struct fltfmt_ieeefmt
@@ -622,16 +632,24 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
      /* Copy the payload.
       *
       * If the payload is all-zero and we're meant to set a signalling NaN
      /* Copy the payload.
       *
       * If the payload is all-zero and we're meant to set a signalling NaN
-     * then report an exactness failure and set the low bit.
+     * then report an exactness failure and set the least-significant bit.
       */
      mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
       */
      mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
-    for (i = 0; i < nw - mw; i++) z[i] = 0;
-    n = x->n; if (n > mw) n = nw;
-    t = shr(z + i, x->frac, n, sh); i += n;
-    if (i < nw) z[i++] = t;
-    sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
-    if (f&FLTF_QNAN) z0 |= B32(sh);
-    else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+    n = x->n;
+      if (n < mw) j = 0;
+      else { n = mw; j = sh; }
+    if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+      ERR(FLTERR_INEXACT);
+      n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+      z[i++] = 1;
+    } else {
+      for (i = 0; i < nw - mw; i++) z[i] = 0;
+      n = x->n; if (n > mw) n = mw;
+      t = shr(z + i, x->frac, n, sh); i += n;
+      if (i < nw) z[i++] = t;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+      if (f&FLTF_QNAN) z0 |= B32(sh);
+    }
  
      /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
      z0 |= M32(fmt->expwd) << esh;
  
      /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
      z0 |= M32(fmt->expwd) << esh;
@@ -1123,6 +1141,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  #  define DIGIT_BITS 4
  #endif
  
  #  define DIGIT_BITS 4
  #endif
  
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
  /* --- @ENCFLT@ --- *
   *
   * Arguments:  @ty@ = the C type to encode
  /* --- @ENCFLT@ --- *
   *
   * Arguments:  @ty@ = the C type to encode
@@ -1205,8 +1230,23 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  
  #endif
  
  
  #endif
  
+#ifdef FROB_NANS
+#  define FROBNAN_ENCDECLS     struct floatbits _y
+#  define FROBNAN_ENC do {                                             \
+     if (_x->f&FLTF_NANMASK) {                                         \
+       _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n;  \
+       _x = &_y;                                                       \
+     }                                                                 \
+   } while (0)
+#else
+#  define FROBNAN_ENCDECLS
+#  define FROBNAN_ENC do ; while (0)
+#endif
+
  #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {                    \
  #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {                    \
+  const struct floatbits *_x = (x);                                    \
    unsigned _rc = 0;                                                    \
    unsigned _rc = 0;                                                    \
+  FROBNAN_ENCDECLS;                                                    \
                                                                         \
    /* See if the native format is one that we recognize. */             \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
                                                                         \
    /* See if the native format is one that we recognize. */             \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1215,8 +1255,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[1];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
        uint32 _t[1];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE: STORE32_B(_z, _t[0]); break;                    \
         case FLTFMT_LE: STORE32_L(_z, _t[0]); break;                    \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE: STORE32_B(_z, _t[0]); break;                    \
         case FLTFMT_LE: STORE32_L(_z, _t[0]); break;                    \
@@ -1227,8 +1267,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
      case FLTFMT_IEEE_F64: {                                            \
        uint32 _t[2];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
      case FLTFMT_IEEE_F64: {                                            \
        uint32 _t[2];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
-      (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
+                                                                       \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);           \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);           \
@@ -1247,8 +1288,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[4];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
        uint32 _t[4];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS);        \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);         \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);         \
@@ -1266,8 +1307,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
        uint32 _t[3];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
        uint32 _t[3];                                                    \
        unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                         \
-      (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_idblext80,                         \
+                           _t, _x, (r), FLTERR_ALLERRS);               \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE16_B(_z + 0, _t[0]);                                     \
        switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
         case FLTFMT_BE:                                                 \
           STORE16_B(_z + 0, _t[0]);                                     \
@@ -1284,7 +1326,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
      default: {                                                         \
        /* We must do this the hard way. */                              \
                                                                         \
      default: {                                                         \
        /* We must do this the hard way. */                              \
                                                                         \
-      const struct floatbits *_x = (x);                                        \
        ty _z;                                                           \
        unsigned _i;                                                     \
        ENC_ROUND_DECLS;                                                 \
        ty _z;                                                           \
        unsigned _i;                                                     \
        ENC_ROUND_DECLS;                                                 \
@@ -1466,7 +1507,16 @@ unsigned fltfmt_encldbl(long double *z_out,
     } while (0)
  #endif
  
     } while (0)
  #endif
  
+#ifdef FROB_NANS
+#  define FROBNAN_DEC do {                                             \
+     if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK;                    \
+   } while (0)
+#else
+#  define FROBNAN_DEC do ; while (0)
+#endif
+
  #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {                    \
  #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {                    \
+  struct floatbits *_z = (z_out);                                      \
    unsigned _rc = 0;                                                    \
                                                                         \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
    unsigned _rc = 0;                                                    \
                                                                         \
    switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1480,8 +1530,7 @@ unsigned fltfmt_encldbl(long double *z_out,
         case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;                    \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
         case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;                    \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC;         \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F64: {                                            \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F64: {                                            \
@@ -1500,8 +1549,7 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC;         \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F128: {                                           \
      } break;                                                           \
                                                                         \
      case FLTFMT_IEEE_F128: {                                           \
@@ -1519,8 +1567,7 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t);                        \
+      _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC;                \
      } break;                                                           \
                                                                         \
      case FLTFMT_INTEL_F80: {                                           \
      } break;                                                           \
                                                                         \
      case FLTFMT_INTEL_F80: {                                           \
@@ -1538,12 +1585,10 @@ unsigned fltfmt_encldbl(long double *z_out,
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
           break;                                                        \
         default: assert(!"unimplemented byte order"); break;            \
        }                                                                        \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
-      _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t);           \
+      _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC;   \
      } break;                                                           \
                                                                         \
      default: {                                                         \
      } break;                                                           \
                                                                         \
      default: {                                                         \
-      struct floatbits *_z = (z_out);                                  \
        ty _x = (x), _y;                                                 \
        unsigned _i, _n, _f = 0;                                         \
        uint32 _t;                                                       \
        ty _x = (x), _y;                                                 \
        unsigned _i, _n, _f = 0;                                         \
        uint32 _t;                                                       \
diff --git a/utils/t/fltfmt-test.c b/utils/t/fltfmt-test.c

index 5b606077e3f09ad1b2cff6fc9c735b37b358b200..9ed7c68440cc34895eebeca6e9fc906d2672e7d1 100644 (file)
--- a/utils/t/fltfmt-test.c
+++ b/utils/t/fltfmt-test.c
@@ -208,7 +208,7 @@ static void test_round(const struct tvec_reg *in, struct tvec_reg *out,
  static const struct tvec_test round_test =
    { "round", round_regs, 0, test_round };
  
  static const struct tvec_test round_test =
    { "round", round_regs, 0, test_round };
  
-/*----- IEEE format conversion --------------------------------------------*/
+/*----- IEEE format conversions -------------------------------------------*/
  
  #define IEEE_FORMATS(_)                                                        \
    _(mini, 1)                                                           \
  
  #define IEEE_FORMATS(_)                                                        \
    _(mini, 1)                                                           \
@@ -348,7 +348,7 @@ IEEE_FORMATS(DEF_TEST)
  #define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test,
  #define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST)
  
  #define DEF_IEEE_TEST(ty, sz) &enc##ty##_test, &dec##ty##_test,
  #define IEEE_TESTS IEEE_FORMATS(DEF_IEEE_TEST)
  
-/*----- Native format conversion ------------------------------------------*/
+/*----- Native format conversions -----------------------------------------*/
  
  #define NATIVE_FORMATS(_)                                              \
    _(flt, float, FLT)                                                   \
  
  #define NATIVE_FORMATS(_)                                              \
    _(flt, float, FLT)                                                   \
@@ -393,64 +393,64 @@ static const struct tvec_flag assume_flags[] = {
  static const struct tvec_flaginfo assume_flaginfo =
    { "assume", assume_flags, &tvrange_uint };
  
  static const struct tvec_flaginfo assume_flaginfo =
    { "assume", assume_flags, &tvrange_uint };
  
-struct nativeenv { struct tvec_env _env; unsigned ntv; };
-struct nativectx { unsigned af, want; };
+struct assumeenv { struct tvec_env _env; unsigned ntv; };
+struct assumectx { unsigned af, want; };
  
  
-static void setup_native(struct tvec_state *tv, const struct tvec_env *env,
+static void setup_assume(struct tvec_state *tv, const struct tvec_env *env,
                          void *pctx, void *ctx)
  {
                          void *pctx, void *ctx)
  {
-  const struct nativeenv *nenv = (const struct nativeenv *)env;
-  const struct ntvinfo *info = &ntvinfo[nenv->ntv];
-  struct nativectx *nctx = ctx;
+  const struct assumeenv *aenv = (const struct assumeenv *)env;
+  const struct ntvinfo *info = &ntvinfo[aenv->ntv];
+  struct assumectx *actx = ctx;
    double prec;
  
    switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {
      case FLTFMT_IEEE_F32:
    double prec;
  
    switch (info->fmt&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {
      case FLTFMT_IEEE_F32:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24;
        break;
      case FLTFMT_IEEE_F64:
        break;
      case FLTFMT_IEEE_F64:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE | AF_PREC24 | AF_PREC53;
        break;
      case FLTFMT_IEEE_F128:
        break;
      case FLTFMT_IEEE_F128:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                  AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113;
        break;
      case FLTFMT_INTEL_F80:
                  AF_PREC24 | AF_PREC53 | AF_PREC64 | AF_PREC113;
        break;
      case FLTFMT_INTEL_F80:
-      nctx->af = AF_NEGZ | AF_INF | AF_IEEE |
+      actx->af = AF_NEGZ | AF_INF | AF_IEEE |
                  AF_PREC24 | AF_PREC53 | AF_PREC64;
        break;
      default:
                  AF_PREC24 | AF_PREC53 | AF_PREC64;
        break;
      default:
-      nctx->af = 0;
-      if (NEGP(-0.0)) nctx->af |= AF_NEGZ;
+      actx->af = 0;
+      if (NEGP(-0.0)) actx->af |= AF_NEGZ;
  #ifdef INF
  #ifdef INF
-      nctx->af |= AF_INF;
+      actx->af |= AF_INF;
  #endif
  #ifdef NAN
  #endif
  #ifdef NAN
-      nctx->af |= AF_STDCNAN;
+      actx->af |= AF_STDCNAN;
  #endif
        prec = log(FLT_RADIX)/log(2.0)*info->mant_dig;
  #endif
        prec = log(FLT_RADIX)/log(2.0)*info->mant_dig;
-      if (prec >= 24) nctx->af |= AF_PREC24;
-      if (prec >= 53) nctx->af |= AF_PREC53;
-      if (prec >= 64) nctx->af |= AF_PREC64;
-      if (prec >= 113) nctx->af |= AF_PREC113;
+      if (prec >= 24) actx->af |= AF_PREC24;
+      if (prec >= 53) actx->af |= AF_PREC53;
+      if (prec >= 64) actx->af |= AF_PREC64;
+      if (prec >= 113) actx->af |= AF_PREC113;
        break;
    }
        break;
    }
-  nctx->want = 0;
+  actx->want = 0;
  }
  
  }
  
-static int setvar_native(struct tvec_state *tv, const char *var,
+static int setvar_assume(struct tvec_state *tv, const char *var,
                          const union tvec_regval *rv, void *ctx)
  {
                          const union tvec_regval *rv, void *ctx)
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
  
-  if (STRCMP(var, ==, "@assume")) nctx->want = rv->u;
+  if (STRCMP(var, ==, "@assume")) actx->want = rv->u;
    else return (tvec_unkregerr(tv, var));
    return (0);
  }
  static const struct tvec_vardef assume_vardef =
    else return (tvec_unkregerr(tv, var));
    return (0);
  }
  static const struct tvec_vardef assume_vardef =
-  { sizeof(struct tvec_reg), setvar_native,
+  { sizeof(struct tvec_reg), setvar_assume,
      { "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }};
      { "@assume", &tvty_flags, 0, 0, { &assume_flaginfo } }};
-static const struct tvec_vardef *findvar_native
+static const struct tvec_vardef *findvar_assume
    (struct tvec_state *tv, const char *name, void **ctx_out, void *ctx)
  {
    if (STRCMP(name, ==, "@assume"))
    (struct tvec_state *tv, const char *name, void **ctx_out, void *ctx)
  {
    if (STRCMP(name, ==, "@assume"))
@@ -459,11 +459,11 @@ static const struct tvec_vardef *findvar_native
      return (0);
  }
  
      return (0);
  }
  
-static void before_native(struct tvec_state *tv, void *ctx)
+static void before_assume(struct tvec_state *tv, void *ctx)
  {
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
  
-  if (nctx->want&~nctx->af)
+  if ((tv->f&TVSF_ACTIVE) && (actx->want&~actx->af))
      tvec_skip(tv, "unsatisfied assumption");
    else {
      DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN);
      tvec_skip(tv, "unsatisfied assumption");
    else {
      DEFAULT_REG(RROUND, rv->u = FLTRND_NEAREVEN);
@@ -471,18 +471,18 @@ static void before_native(struct tvec_state *tv, void *ctx)
    }
  }
  
    }
  }
  
-static void after_native(struct tvec_state *tv, void *ctx)
+static void after_assume(struct tvec_state *tv, void *ctx)
  {
  {
-  struct nativectx *nctx = ctx;
+  struct assumectx *actx = ctx;
  
  
-  nctx->want = 0;
+  actx->want = 0;
  }
  
  #define DEF_TEST(ty, cty, TY)                                          \
                                                                         \
  }
  
  #define DEF_TEST(ty, cty, TY)                                          \
                                                                         \
-  static struct nativeenv ty##_env =                                   \
-    { { sizeof(struct nativectx),                                      \
-       setup_native, findvar_native, before_native, 0, after_native, 0 }, \
+  static struct assumeenv ty##_env =                                   \
+    { { sizeof(struct assumectx),                                      \
+       setup_assume, findvar_assume, before_assume, 0, after_assume, 0 }, \
        NTV_##TY };                                                      \
                                                                         \
    static const struct tvec_regdef enc##ty##_regs[] = {                 \
        NTV_##TY };                                                      \
                                                                         \
    static const struct tvec_regdef enc##ty##_regs[] = {                 \
@@ -538,12 +538,93 @@ NATIVE_FORMATS(DEF_TEST)
  #define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test,
  #define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST)
  
  #define DEF_NATIVE_TEST(ty, cty, TY) &enc##ty##_test, &dec##ty##_test,
  #define NATIVE_TESTS NATIVE_FORMATS(DEF_NATIVE_TEST)
  
+/*----- Direct conversions ------------------------------------------------*/
+
+#define DIRECT_CONVERSIONS(_)                                          \
+  _(flt, float, f32)                                                   \
+  _(dbl, double, f64)
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##ty##to##fty##e(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    tvec_allocbytes(&out[RZ_OUT].v, OUTSZ_##fty);                      \
+    out[RERR_OUT].v.u = fltfmt_##ty##to##fty##e(out[RZ_OUT].v.bytes.p, \
+                                               in[RX].v.f,             \
+                                               in[RROUND].v.u);        \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test ty##to##fty##e##_test =                        \
+    { #ty "to" #fty #e, ty##to##fty##_regs, &ty##_env._env,            \
+      test_##ty##to##fty##e };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef ty##to##fty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_float,    RX,     0,      { &tvflt_##cty } },     \
+    { "z",     &tvty_bytes,    RZ_OUT, 0,      { &fty##_range } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_TEST1(ty, cty, fty, e)                                     \
+  static void test_##fty##e##to##ty(const struct tvec_reg *in,         \
+                                   struct tvec_reg *out,               \
+                                   void *ctx)                          \
+  {                                                                    \
+    cty z;                                                             \
+                                                                       \
+    out[RERR_OUT].v.u = fltfmt_##fty##e##to##ty(&z, in[RX].v.bytes.p,  \
+                                               in[RROUND].v.u);        \
+    out[RZ_OUT].v.f = z;                                               \
+  }                                                                    \
+                                                                       \
+  static const struct tvec_test fty##e##to##ty##_test =                        \
+    { #fty #e "to" #ty, fty##to##ty##_regs, &ty##_env._env,            \
+      test_##fty##e##to##ty };
+
+#define DEF_TEST(ty, cty, fty)                                         \
+  static const struct tvec_regdef fty##to##ty##_regs[] = {             \
+    { "round", &tvty_flags,    RROUND, TVRF_OPT, { &fltrnd_flaginfo } }, \
+    { "x",     &tvty_bytes,    RX,     0,      { &fty##_range } },     \
+    { "z",     &tvty_float,    RZ_OUT, 0,      { &tvflt_##cty } },     \
+    { "err",   &tvty_flags,    RERR_OUT, TVRF_OPT, { &flterr_flaginfo } }, \
+    TVEC_ENDREGS                                                       \
+  };                                                                   \
+                                                                       \
+  DEF_TEST1(ty, cty, fty, l)                                           \
+  DEF_TEST1(ty, cty, fty, b)
+
+DIRECT_CONVERSIONS(DEF_TEST)
+
+#undef DEF_TEST1
+#undef DEF_TEST
+
+#define DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                            \
+  &ty##to##fty##l_test, &ty##to##fty##b_test,
+#define DEF_DIRECT_FTOC_TESTS(ty, cty, fty)                            \
+  &fty##l##to##ty##_test, &fty##b##to##ty##_test,
+#define DEF_DIRECT_TESTS(ty, cty, fty)                                 \
+  DEF_DIRECT_CTOF_TESTS(ty, cty, fty)                                  \
+  DEF_DIRECT_FTOC_TESTS(ty, cty, fty)
+#define DIRECT_TESTS DIRECT_CONVERSIONS(DEF_DIRECT_TESTS)
+
  /*----- Main code ---------------------------------------------------------*/
  
  static const struct tvec_test *const tests[] = {
    &round_test,
    NATIVE_TESTS
    IEEE_TESTS
  /*----- Main code ---------------------------------------------------------*/
  
  static const struct tvec_test *const tests[] = {
    &round_test,
    NATIVE_TESTS
    IEEE_TESTS
+  DIRECT_TESTS
    0
  };
  
    0
  };
  
diff --git a/utils/t/fltfmt-testgen b/utils/t/fltfmt-testgen

index 7f63664ad76271c609b9ab6f148ef18c123f990a..b9670b0ec766a5755e3a095eea4d083a9b7e2418 100755 (executable)
--- a/utils/t/fltfmt-testgen
+++ b/utils/t/fltfmt-testgen
@@ -73,9 +73,12 @@ def explore(wd, lobits, hibits):
      for hi in xrange(bit(hibits)):
        top = hi << hishift
        for lo in xrange(bit(lobits)):
      for hi in xrange(bit(hibits)):
        top = hi << hishift
        for lo in xrange(bit(lobits)):
+        while True:
+          fill = R.randrange(midbit)
+          if fill != 0 and fill != midbit - 1: break
          base = lo | top
          yield base
          base = lo | top
          yield base
-        yield base | (R.randrange(midbit) << lobits)
+        yield base | (fill << lobits)
          yield base | m
  
  class ExploreParameters (object):
          yield base | m
  
  class ExploreParameters (object):
diff --git a/utils/t/fltfmt.tests b/utils/t/fltfmt.tests

index bf85107c57fe9b79321dacd31fda8a5721149930..840f5140f99c8b68fa4421b38fbea1ca51f897b4 100644 (file)
--- a/utils/t/fltfmt.tests
+++ b/utils/t/fltfmt.tests
@@ -225,6 +225,31 @@ err = INEXACT
  
  [encf64]
  
  
  [encf64]
  
+;; NaN conversions
+
+f = QNAN
+m = #empty
+z = 7ff80000 00000000
+
+f = SNAN
+m = #empty
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00002000
+z = 7ff00000 00000001
+
+f = SNAN
+m = 00000000 00001fff
+z = 7ff00000 00000001
+err = INEXACT
+
+f = SNAN
+m = 00000000 00000000 00000001
+z = 7ff00000 00000001
+err = INEXACT
+
  ;; Check NaN truncation.
  
  errmask = 0
  ;; Check NaN truncation.
  
  errmask = 0
@@ -418,7 +443,7 @@ z = -0.625
  f = 0
  e = 3
  m = c90fdb00
  f = 0
  e = 3
  m = c90fdb00
-z = 6.2831854820251465
+z = 6.2831853
  
  @assume = stdc-nan
  f = QNAN
  
  @assume = stdc-nan
  f = QNAN
@@ -467,7 +492,7 @@ err = INEXACT
  @assume = ieee
  x = #nan
  f = QNAN
  @assume = ieee
  x = #nan
  f = QNAN
-m = 00000000 ; maybe
+;; m = ... something
  err = OK
  
  [encdbl]
  err = OK
  
  [encdbl]
@@ -508,6 +533,7 @@ err = INEXACT
  f = QNAN
  m = #empty
  z = #nan
  f = QNAN
  m = #empty
  z = #nan
+err = OK
  
  [decdbl]
  
  
  [decdbl]
  
@@ -545,7 +571,88 @@ err = INEXACT
  @assume = ieee
  x = #nan
  f = QNAN
  @assume = ieee
  x = #nan
  f = QNAN
-m = 00000000 00000000 ; maybe
+;; m = ... something
  err = OK
  
  ;;;--------------------------------------------------------------------------
  err = OK
  
  ;;;--------------------------------------------------------------------------
+;;; Direct conversions.
+
+[f32btoflt]
+
+x = 00000000
+z = 0.0
+
+@assume = negz
+x = 80000000
+z = -0.0
+
+@assume = inf
+x = 7f800000
+z = #+inf
+
+@assume = inf
+x = ff800000
+z = #-inf
+
+@assume = stdc-nan | ieee
+x = 7fc00000
+z = #nan
+
+@assume = stdc-nan | ieee
+x = 7fa00000
+z = #nan
+
+x = 3f800000
+z = 1.0
+
+@assume = prec24
+x = 40c90fdb
+z = 6.2831853
+
+[f32ltoflt]
+
+x = 10c12549
+z = 678929
+
+[flttof32b]
+
+x = 678929
+z = 4925c110
+
+@assume = prec24
+x = 6.2831853
+z = 40c90fdb
+
+[flttof32l]
+
+x = 678929
+z = 10c12549
+
+[dbltof64b]
+
+x = 0.0
+z = 00000000 00000000
+
+x = -2964135146
+z = c1e615a3 9d400000
+
+@assume = prec53
+x = 6.283185307179586
+z = 401921fb 54442d18
+
+[dbltof64l]
+
+x = -2964135146
+z = 0000409d a315e6c1
+
+[f64btodbl]
+
+x = c1e615a3 9d400000
+z = -2964135146
+
+[f64ltodbl]
+
+x = 0000409d a315e6c1
+z = -2964135146
+
+;;;--------------------------------------------------------------------------
author	Mark Wooding <mdw@distorted.org.uk>
	Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Tue, 23 Apr 2024 23:49:56 +0000 (00:49 +0100)
defs.man		patch \| blob \| blame \| history
mem/arena.3.in		patch \| blob \| blame \| history
test/bench.3.in		patch \| blob \| blame \| history
test/tvec-types.c		patch \| blob \| blame \| history
utils/fltfmt-convert.c		patch \| blob \| blame \| history
utils/fltfmt.3.in		patch \| blob \| blame \| history
utils/fltfmt.c		patch \| blob \| blame \| history
utils/t/fltfmt-test.c		patch \| blob \| blame \| history
utils/t/fltfmt-testgen		patch \| blob \| blame \| history
utils/t/fltfmt.tests		patch \| blob \| blame \| history