X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/mLib/blobdiff_plain/289651a7df7bc48724137cd0faaf8535fb54e73b..refs/heads/mdw/tvec:/utils/fltfmt.c

diff --git a/utils/fltfmt.c b/utils/fltfmt.c
index 89c705d..177004e 100644
--- a/utils/fltfmt.c
+++ b/utils/fltfmt.c
@@ -39,9 +39,19 @@
 #include "bits.h"
 #include "fltfmt.h"
 #include "growbuf.h"
-#include "macros.h"
 #include "maths.h"
 
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding.  Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%.  But
+ * that doesn't actually work on GCC, or on Clang from not too long ago.  So
+ * use compiler-specific hacking to support this.
+ */
 #if GCC_VERSION_P(4, 4)
 #  pragma GCC optimize "-frounding-math"
 #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
@@ -291,7 +301,7 @@ static unsigned ms_set_bit(const uint32 *x, unsigned from, unsigned to)
    */
 
   /* If the region is empty then it's technically true that all of the bits
-   * are zero.  It's important to be able to do answer the case where
+   * are zero.  It's important to be able to answer the case where
    * %$\id{from} = \id{to} = 0$% without accessing memory.
    */
   assert(to >= from); if (to == from) return (ALLCLEAR);
@@ -531,16 +541,16 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x,
   return (rc);
 }
 
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
 
 /* IEEE (and related) format descriptions. */
 const struct fltfmt_ieeefmt
-  fltfmt_mini = { IEEEF_HIDDEN, 4, 4 },
-  fltfmt_bf16 = { IEEEF_HIDDEN, 8, 8 },
-  fltfmt_f16 = { IEEEF_HIDDEN, 5, 11 },
-  fltfmt_f32 = { IEEEF_HIDDEN, 8, 24 },
-  fltfmt_f64 = { IEEEF_HIDDEN, 11, 53 },
-  fltfmt_f128 = { IEEEF_HIDDEN, 15, 113 },
+  fltfmt_mini = { FLTIF_HIDDEN, 4, 4 },
+  fltfmt_bf16 = { FLTIF_HIDDEN, 8, 8 },
+  fltfmt_f16 = { FLTIF_HIDDEN, 5, 11 },
+  fltfmt_f32 = { FLTIF_HIDDEN, 8, 24 },
+  fltfmt_f64 = { FLTIF_HIDDEN, 11, 53 },
+  fltfmt_f128 = { FLTIF_HIDDEN, 15, 113 },
   fltfmt_idblext80 = { 0, 15, 64 };
 
 /* --- @fltfmt_encieee@ ---
@@ -585,7 +595,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
 
   /* Determine the output size. */
   nb = fmt->prec + fmt->expwd + 1;
-  if (fmt->f&IEEEF_HIDDEN) nb--;
+  if (fmt->f&FLTIF_HIDDEN) nb--;
   nw = (nb + 31)/32;
 
   /* Determine the top bits. */
@@ -603,7 +613,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
      */
 
     z0 |= M32(fmt->expwd) << esh;
-    if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+    if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
 
   } else if (f&FLTF_NANMASK) {
     /* Not-a-number.
@@ -622,20 +632,28 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
     /* Copy the payload.
      *
      * If the payload is all-zero and we're meant to set a signalling NaN
-     * then report an exactness failure and set the low bit.
+     * then report an exactness failure and set the least-significant bit.
      */
     mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
-    for (i = 0; i < nw - mw; i++) z[i] = 0;
-    n = x->n; if (n > mw) n = nw;
-    t = shr(z + i, x->frac, n, sh); i += n;
-    if (i < nw) z[i++] = t;
-    sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
-    if (f&FLTF_QNAN) z0 |= B32(sh);
-    else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+    n = x->n;
+      if (n < mw) j = 0;
+      else { n = mw; j = sh; }
+    if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+      ERR(FLTERR_INEXACT);
+      n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+      z[i++] = 1;
+    } else {
+      for (i = 0; i < nw - mw; i++) z[i] = 0;
+      n = x->n; if (n > mw) n = mw;
+      t = shr(z + i, x->frac, n, sh); i += n;
+      if (i < nw) z[i++] = t;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+      if (f&FLTF_QNAN) z0 |= B32(sh);
+    }
 
     /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
     z0 |= M32(fmt->expwd) << esh;
-    if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+    if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
 
   } else {
     /* A finite value.
@@ -711,15 +729,14 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
 	 * the rounding mode.
 	 */
 
+	ERR(FLTERR_OFLOW | FLTERR_INEXACT);
 	rf = FRPF_ODD | FRPF_HALF | FRPF_LOW;
 	if (f&FLTF_NEG) rf |= FRPF_NEG;
-	if ((r >> rf)&1) {
-	  ERR(FLTERR_OFLOW | FLTERR_INEXACT);
+	if ((r >> rf)&1)
 	  z0 |= M32(fmt->expwd) << esh;
-	} else {
-	  ERR(FLTERR_INEXACT);
+	else {
 	  z0 |= (B32(fmt->expwd) - 2) << esh;
-	  mb = fmt->prec; if (fmt->f&IEEEF_HIDDEN) mb--;
+	  mb = fmt->prec; if (fmt->f&FLTIF_HIDDEN) mb--;
 	  mw = (mb + 31)/32;
 	  i = nw - mw;
 	  z[i++] = M32(mb%32);
@@ -740,8 +757,10 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
 	/* Set the biased exponent. */
 	z0 |= (exp + maxexp) << esh;
 
-	/* Clear the unit bit if we're suppose to use a hidden-bit convention. */
-	if (fmt->f&IEEEF_HIDDEN) {
+	/* Clear the unit bit if we're suppose to use a hidden-bit
+	 * convention.
+	 */
+	if (fmt->f&FLTIF_HIDDEN) {
 	  mb = fmt->prec - 1; mw = (mb + 31)/32; mb = mb%32;
 	  z[nw - mw] &= ~B32(mb);
 	}
@@ -944,7 +963,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
    */
   assert(fmt->expwd + 3 <= 32);
   esh = 31 - fmt->expwd; emask = M32(fmt->expwd);
-  sigwd = fmt->prec; if (fmt->f&IEEEF_HIDDEN) sigwd--;
+  sigwd = fmt->prec; if (fmt->f&FLTIF_HIDDEN) sigwd--;
 
   /* Determine the input size. */
   nb = sigwd + fmt->expwd + 1; nw = (nb + 31)/32;
@@ -964,7 +983,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
      * Note that we don't include the quiet bit in our decoded payload.
      */
 
-    if (!(fmt->f&IEEEF_HIDDEN)) {
+    if (!(fmt->f&FLTIF_HIDDEN)) {
       /* No hidden bit, so we expect the unit bit to be set.  If it isn't,
        * that's technically invalid, and its absence won't survive a round
        * trip, since the bit isn't considered part of a NaN payload -- or
@@ -980,7 +999,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
     if (ms_set_bit(x + nw, 0, sigwd) == ALLCLEAR)
       f |= FLTF_INF;
     else {
-      sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
       if (x0&B32(sh)) f |= FLTF_QNAN;
       else f |= FLTF_SNAN;
       sigwd--; mw = (sigwd + 31)/32;
@@ -998,7 +1017,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
    * Otherwise, we'll normalize the incoming value regardless, but report
    * settings of the unit bit which are inconsistent with the exponent.
    */
-  if (fmt->f&IEEEF_HIDDEN) {
+  if (fmt->f&FLTIF_HIDDEN) {
     if (!t) { exp = minexp; goto normalize; }
     else { exp = t - maxexp; goto hidden; }
   } else {
@@ -1124,6 +1143,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 #  define DIGIT_BITS 4
 #endif
 
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
 /* --- @ENCFLT@ --- *
  *
  * Arguments:	@ty@ = the C type to encode
@@ -1179,8 +1205,10 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     * requisite compile-time configuration machinery is worth the effort.)
     */
 
-#  define SETINF(TY, rc, z)						\
-	do { (z) = TY##_MAX; (rc) = FLTERR_OFLOW | FLTERR_INEXACT; } while (0)
+#  define SETINF(TY, rc, z) do {					\
+	(z) = TY##_MAX;							\
+	(rc) = FLTERR_OFLOW | FLTERR_INEXACT;				\
+   } while (0)
 #endif
 
 #ifdef DIGIT_BITS
@@ -1206,8 +1234,33 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 
 #endif
 
+#ifdef FROB_NANS
+   /* The native floating point format uses the opposite quiet-vs-signalling
+    * NaN convention from the recommended `quiet bit' convention, so the bit
+    * needs hacking on input.
+    */
+
+#  define FROBNAN_ENCDECLS	struct floatbits _y
+#  define FROBNAN_ENC do {						\
+     if (_x->f&FLTF_NANMASK) {						\
+       _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n;	\
+       _x = &_y;							\
+     }									\
+   } while (0)
+#else
+   /* The native floating point format either uses the conventional
+    * `quiet-bit' convention, or isn't IEEE at all.  Either way, there's
+    * nothing to do here.
+    */
+
+#  define FROBNAN_ENCDECLS
+#  define FROBNAN_ENC do ; while (0)
+#endif
+
 #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {			\
+  const struct floatbits *_x = (x);					\
   unsigned _rc = 0;							\
+  FROBNAN_ENCDECLS;							\
 									\
   /* See if the native format is one that we recognize. */		\
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {		\
@@ -1216,8 +1269,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[1];							\
       unsigned char *_z = (unsigned char *)(z_out);			\
 									\
-      (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS);	\
-      FLTFMT__FROB_NAN_F32(_t, _rc);					\
+      FROBNAN_ENC;							\
+      (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS);	\
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {				\
 	case FLTFMT_BE: STORE32_B(_z, _t[0]); break;			\
 	case FLTFMT_LE: STORE32_L(_z, _t[0]); break;			\
@@ -1228,8 +1281,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     case FLTFMT_IEEE_F64: {						\
       uint32 _t[2];							\
       unsigned char *_z = (unsigned char *)(z_out);			\
-      (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS);	\
-      FLTFMT__FROB_NAN_F64(_t, _rc);					\
+									\
+      FROBNAN_ENC;							\
+      (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS);	\
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {				\
 	case FLTFMT_BE:							\
 	  STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);		\
@@ -1248,8 +1302,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[4];							\
       unsigned char *_z = (unsigned char *)(z_out);			\
 									\
-      FLTFMT__FROB_NAN_F128(_t, _rc);					\
-      (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+      FROBNAN_ENC;							\
+      (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS);	\
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {				\
 	case FLTFMT_BE:							\
 	  STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);		\
@@ -1267,8 +1321,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[3];							\
       unsigned char *_z = (unsigned char *)(z_out);			\
 									\
-      (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);				\
+      FROBNAN_ENC;							\
+      (rc) = fltfmt_encieee(&fltfmt_idblext80,				\
+			    _t, _x, (r), FLTERR_ALLERRS);		\
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {				\
 	case FLTFMT_BE:							\
 	  STORE16_B(_z + 0, _t[0]);					\
@@ -1285,7 +1340,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     default: {								\
       /* We must do this the hard way. */				\
 									\
-      const struct floatbits *_x = (x);					\
       ty _z;								\
       unsigned _i;							\
       ENC_ROUND_DECLS;							\
@@ -1372,7 +1426,7 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  *		    zero.
  *
  *		  * If the implementation's floating-point radix is not a
- *		    power of two, and @x@ is a nonzero finite value, then
+ *		    power of two, and @x@ is a nonzero finite value, then the
  *		    @FLTERR_INEXACT@ error bit is set (unconditionally), and
  *		    the value is rounded by the implementation using its
  *		    prevailing rounding policy.  If the radix is a power of
@@ -1467,7 +1521,26 @@ unsigned fltfmt_encldbl(long double *z_out,
    } while (0)
 #endif
 
+#ifdef FROB_NANS
+   /* The native floating point format uses the opposite quiet-vs-signalling
+    * NaN convention from the recommended `quiet bit' convention, so the bit
+    * needs hacking on output.
+    */
+
+#  define FROBNAN_DEC do {						\
+     if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK;			\
+   } while (0)
+#else
+   /* The native floating point format either uses the conventional
+    * `quiet-bit' convention, or isn't IEEE at all.  Either way, there's
+    * nothing to do here.
+    */
+
+#  define FROBNAN_DEC do ; while (0)
+#endif
+
 #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {			\
+  struct floatbits *_z = (z_out);					\
   unsigned _rc = 0;							\
 									\
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {		\
@@ -1481,8 +1554,7 @@ unsigned fltfmt_encldbl(long double *z_out,
 	case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;			\
 	default: assert(!"unimplemented byte order"); break;		\
       }									\
-      FLTFMT__FROB_NAN_F32(_t, _rc);					\
-      _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t);			\
+      _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC;		\
     } break;								\
 									\
     case FLTFMT_IEEE_F64: {						\
@@ -1501,8 +1573,7 @@ unsigned fltfmt_encldbl(long double *z_out,
 	  break;							\
 	default: assert(!"unimplemented byte order"); break;		\
       }									\
-      FLTFMT__FROB_NAN_F64(_t, _rc);					\
-      _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t);			\
+      _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC;		\
     } break;								\
 									\
     case FLTFMT_IEEE_F128: {						\
@@ -1520,8 +1591,7 @@ unsigned fltfmt_encldbl(long double *z_out,
 	  break;							\
 	default: assert(!"unimplemented byte order"); break;		\
       }									\
-      FLTFMT__FROB_NAN_F128(_t, _rc);					\
-      _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t);			\
+      _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC;		\
     } break;								\
 									\
     case FLTFMT_INTEL_F80: {						\
@@ -1539,12 +1609,10 @@ unsigned fltfmt_encldbl(long double *z_out,
 	  break;							\
 	default: assert(!"unimplemented byte order"); break;		\
       }									\
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);				\
-      _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t);		\
+      _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC;	\
     } break;								\
 									\
     default: {								\
-      struct floatbits *_z = (z_out);					\
       ty _x = (x), _y;							\
       unsigned _i, _n, _f = 0;						\
       uint32 _t;							\
@@ -1594,7 +1662,7 @@ unsigned fltfmt_encldbl(long double *z_out,
  *
  * Returns:	Error flags (@FLTERR_...@).
  *
- * Use:		Decode the native C floatingpoint value @x@ and store the
+ * Use:		Decode the native C floating-point value @x@ and store the
  *		result in @z_out@.
  *
  *		The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
@@ -1611,7 +1679,7 @@ unsigned fltfmt_encldbl(long double *z_out,
  *		    error bit is set and the decoded payload is left empty.
  *
  *		  * If the implementation's floating-point radix is not a
- *		    power of two, and @x@ is a nonzero finite value, then
+ *		    power of two, and @x@ is a nonzero finite value, then the
  *		    @FLTERR_INEXACT@ error bit is set (unconditionally), and
  *		    the rounded value (according to the rounding mode @r@) is
  *		    stored in as many fraction words as necessary to identify