chiark / gitweb /
@@@ tty wip
[mLib] / utils / fltfmt.c
index 89c705d79dd0504054eba2386a70da98aa68fa93..177004e89c1532f6396f9010c48d2ac48a065833 100644 (file)
 #include "bits.h"
 #include "fltfmt.h"
 #include "growbuf.h"
-#include "macros.h"
 #include "maths.h"
 
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding.  Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%.  But
+ * that doesn't actually work on GCC, or on Clang from not too long ago.  So
+ * use compiler-specific hacking to support this.
+ */
 #if GCC_VERSION_P(4, 4)
 #  pragma GCC optimize "-frounding-math"
 #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
@@ -291,7 +301,7 @@ static unsigned ms_set_bit(const uint32 *x, unsigned from, unsigned to)
    */
 
   /* If the region is empty then it's technically true that all of the bits
-   * are zero.  It's important to be able to do answer the case where
+   * are zero.  It's important to be able to answer the case where
    * %$\id{from} = \id{to} = 0$% without accessing memory.
    */
   assert(to >= from); if (to == from) return (ALLCLEAR);
@@ -531,16 +541,16 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x,
   return (rc);
 }
 
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
 
 /* IEEE (and related) format descriptions. */
 const struct fltfmt_ieeefmt
-  fltfmt_mini = { IEEEF_HIDDEN, 4, 4 },
-  fltfmt_bf16 = { IEEEF_HIDDEN, 8, 8 },
-  fltfmt_f16 = { IEEEF_HIDDEN, 5, 11 },
-  fltfmt_f32 = { IEEEF_HIDDEN, 8, 24 },
-  fltfmt_f64 = { IEEEF_HIDDEN, 11, 53 },
-  fltfmt_f128 = { IEEEF_HIDDEN, 15, 113 },
+  fltfmt_mini = { FLTIF_HIDDEN, 4, 4 },
+  fltfmt_bf16 = { FLTIF_HIDDEN, 8, 8 },
+  fltfmt_f16 = { FLTIF_HIDDEN, 5, 11 },
+  fltfmt_f32 = { FLTIF_HIDDEN, 8, 24 },
+  fltfmt_f64 = { FLTIF_HIDDEN, 11, 53 },
+  fltfmt_f128 = { FLTIF_HIDDEN, 15, 113 },
   fltfmt_idblext80 = { 0, 15, 64 };
 
 /* --- @fltfmt_encieee@ ---
@@ -585,7 +595,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
 
   /* Determine the output size. */
   nb = fmt->prec + fmt->expwd + 1;
-  if (fmt->f&IEEEF_HIDDEN) nb--;
+  if (fmt->f&FLTIF_HIDDEN) nb--;
   nw = (nb + 31)/32;
 
   /* Determine the top bits. */
@@ -603,7 +613,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
      */
 
     z0 |= M32(fmt->expwd) << esh;
-    if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+    if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
 
   } else if (f&FLTF_NANMASK) {
     /* Not-a-number.
@@ -622,20 +632,28 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
     /* Copy the payload.
      *
      * If the payload is all-zero and we're meant to set a signalling NaN
-     * then report an exactness failure and set the low bit.
+     * then report an exactness failure and set the least-significant bit.
      */
     mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
-    for (i = 0; i < nw - mw; i++) z[i] = 0;
-    n = x->n; if (n > mw) n = nw;
-    t = shr(z + i, x->frac, n, sh); i += n;
-    if (i < nw) z[i++] = t;
-    sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
-    if (f&FLTF_QNAN) z0 |= B32(sh);
-    else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+    n = x->n;
+      if (n < mw) j = 0;
+      else { n = mw; j = sh; }
+    if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+      ERR(FLTERR_INEXACT);
+      n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+      z[i++] = 1;
+    } else {
+      for (i = 0; i < nw - mw; i++) z[i] = 0;
+      n = x->n; if (n > mw) n = mw;
+      t = shr(z + i, x->frac, n, sh); i += n;
+      if (i < nw) z[i++] = t;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+      if (f&FLTF_QNAN) z0 |= B32(sh);
+    }
 
     /* Set the exponent and, for non-hidden-bit formats, the unit bit. */
     z0 |= M32(fmt->expwd) << esh;
-    if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+    if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
 
   } else {
     /* A finite value.
@@ -711,15 +729,14 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
         * the rounding mode.
         */
 
+       ERR(FLTERR_OFLOW | FLTERR_INEXACT);
        rf = FRPF_ODD | FRPF_HALF | FRPF_LOW;
        if (f&FLTF_NEG) rf |= FRPF_NEG;
-       if ((r >> rf)&1) {
-         ERR(FLTERR_OFLOW | FLTERR_INEXACT);
+       if ((r >> rf)&1)
          z0 |= M32(fmt->expwd) << esh;
-       } else {
-         ERR(FLTERR_INEXACT);
+       else {
          z0 |= (B32(fmt->expwd) - 2) << esh;
-         mb = fmt->prec; if (fmt->f&IEEEF_HIDDEN) mb--;
+         mb = fmt->prec; if (fmt->f&FLTIF_HIDDEN) mb--;
          mw = (mb + 31)/32;
          i = nw - mw;
          z[i++] = M32(mb%32);
@@ -740,8 +757,10 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt,
        /* Set the biased exponent. */
        z0 |= (exp + maxexp) << esh;
 
-       /* Clear the unit bit if we're suppose to use a hidden-bit convention. */
-       if (fmt->f&IEEEF_HIDDEN) {
+       /* Clear the unit bit if we're suppose to use a hidden-bit
+        * convention.
+        */
+       if (fmt->f&FLTIF_HIDDEN) {
          mb = fmt->prec - 1; mw = (mb + 31)/32; mb = mb%32;
          z[nw - mw] &= ~B32(mb);
        }
@@ -944,7 +963,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
    */
   assert(fmt->expwd + 3 <= 32);
   esh = 31 - fmt->expwd; emask = M32(fmt->expwd);
-  sigwd = fmt->prec; if (fmt->f&IEEEF_HIDDEN) sigwd--;
+  sigwd = fmt->prec; if (fmt->f&FLTIF_HIDDEN) sigwd--;
 
   /* Determine the input size. */
   nb = sigwd + fmt->expwd + 1; nw = (nb + 31)/32;
@@ -964,7 +983,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
      * Note that we don't include the quiet bit in our decoded payload.
      */
 
-    if (!(fmt->f&IEEEF_HIDDEN)) {
+    if (!(fmt->f&FLTIF_HIDDEN)) {
       /* No hidden bit, so we expect the unit bit to be set.  If it isn't,
        * that's technically invalid, and its absence won't survive a round
        * trip, since the bit isn't considered part of a NaN payload -- or
@@ -980,7 +999,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
     if (ms_set_bit(x + nw, 0, sigwd) == ALLCLEAR)
       f |= FLTF_INF;
     else {
-      sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
+      sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
       if (x0&B32(sh)) f |= FLTF_QNAN;
       else f |= FLTF_SNAN;
       sigwd--; mw = (sigwd + 31)/32;
@@ -998,7 +1017,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt,
    * Otherwise, we'll normalize the incoming value regardless, but report
    * settings of the unit bit which are inconsistent with the exponent.
    */
-  if (fmt->f&IEEEF_HIDDEN) {
+  if (fmt->f&FLTIF_HIDDEN) {
     if (!t) { exp = minexp; goto normalize; }
     else { exp = t - maxexp; goto hidden; }
   } else {
@@ -1124,6 +1143,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 #  define DIGIT_BITS 4
 #endif
 
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FROB_NANS
+#endif
+
 /* --- @ENCFLT@ --- *
  *
  * Arguments:  @ty@ = the C type to encode
@@ -1179,8 +1205,10 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     * requisite compile-time configuration machinery is worth the effort.)
     */
 
-#  define SETINF(TY, rc, z)                                            \
-       do { (z) = TY##_MAX; (rc) = FLTERR_OFLOW | FLTERR_INEXACT; } while (0)
+#  define SETINF(TY, rc, z) do {                                       \
+       (z) = TY##_MAX;                                                 \
+       (rc) = FLTERR_OFLOW | FLTERR_INEXACT;                           \
+   } while (0)
 #endif
 
 #ifdef DIGIT_BITS
@@ -1206,8 +1234,33 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
 
 #endif
 
+#ifdef FROB_NANS
+   /* The native floating point format uses the opposite quiet-vs-signalling
+    * NaN convention from the recommended `quiet bit' convention, so the bit
+    * needs hacking on input.
+    */
+
+#  define FROBNAN_ENCDECLS     struct floatbits _y
+#  define FROBNAN_ENC do {                                             \
+     if (_x->f&FLTF_NANMASK) {                                         \
+       _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n;  \
+       _x = &_y;                                                       \
+     }                                                                 \
+   } while (0)
+#else
+   /* The native floating point format either uses the conventional
+    * `quiet-bit' convention, or isn't IEEE at all.  Either way, there's
+    * nothing to do here.
+    */
+
+#  define FROBNAN_ENCDECLS
+#  define FROBNAN_ENC do ; while (0)
+#endif
+
 #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do {                    \
+  const struct floatbits *_x = (x);                                    \
   unsigned _rc = 0;                                                    \
+  FROBNAN_ENCDECLS;                                                    \
                                                                        \
   /* See if the native format is one that we recognize. */             \
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1216,8 +1269,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[1];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE: STORE32_B(_z, _t[0]); break;                    \
        case FLTFMT_LE: STORE32_L(_z, _t[0]); break;                    \
@@ -1228,8 +1281,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     case FLTFMT_IEEE_F64: {                                            \
       uint32 _t[2];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
-      (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS);        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
+                                                                       \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]);           \
@@ -1248,8 +1302,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[4];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS);        \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE32_B(_z +  0, _t[0]); STORE32_B(_z +  4, _t[1]);         \
@@ -1267,8 +1321,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
       uint32 _t[3];                                                    \
       unsigned char *_z = (unsigned char *)(z_out);                    \
                                                                        \
-      (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
+      FROBNAN_ENC;                                                     \
+      (rc) = fltfmt_encieee(&fltfmt_idblext80,                         \
+                           _t, _x, (r), FLTERR_ALLERRS);               \
       switch (TY##_FORMAT&FLTFMT_ENDMASK) {                            \
        case FLTFMT_BE:                                                 \
          STORE16_B(_z + 0, _t[0]);                                     \
@@ -1285,7 +1340,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
     default: {                                                         \
       /* We must do this the hard way. */                              \
                                                                        \
-      const struct floatbits *_x = (x);                                        \
       ty _z;                                                           \
       unsigned _i;                                                     \
       ENC_ROUND_DECLS;                                                 \
@@ -1372,7 +1426,7 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m)
  *                 zero.
  *
  *               * If the implementation's floating-point radix is not a
- *                 power of two, and @x@ is a nonzero finite value, then
+ *                 power of two, and @x@ is a nonzero finite value, then the
  *                 @FLTERR_INEXACT@ error bit is set (unconditionally), and
  *                 the value is rounded by the implementation using its
  *                 prevailing rounding policy.  If the radix is a power of
@@ -1467,7 +1521,26 @@ unsigned fltfmt_encldbl(long double *z_out,
    } while (0)
 #endif
 
+#ifdef FROB_NANS
+   /* The native floating point format uses the opposite quiet-vs-signalling
+    * NaN convention from the recommended `quiet bit' convention, so the bit
+    * needs hacking on output.
+    */
+
+#  define FROBNAN_DEC do {                                             \
+     if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK;                    \
+   } while (0)
+#else
+   /* The native floating point format either uses the conventional
+    * `quiet-bit' convention, or isn't IEEE at all.  Either way, there's
+    * nothing to do here.
+    */
+
+#  define FROBNAN_DEC do ; while (0)
+#endif
+
 #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do {                    \
+  struct floatbits *_z = (z_out);                                      \
   unsigned _rc = 0;                                                    \
                                                                        \
   switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) {            \
@@ -1481,8 +1554,7 @@ unsigned fltfmt_encldbl(long double *z_out,
        case FLTFMT_LE: _t[0] = LOAD32_L(_x); break;                    \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F32(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC;         \
     } break;                                                           \
                                                                        \
     case FLTFMT_IEEE_F64: {                                            \
@@ -1501,8 +1573,7 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F64(_t, _rc);                                   \
-      _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t);                 \
+      _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC;         \
     } break;                                                           \
                                                                        \
     case FLTFMT_IEEE_F128: {                                           \
@@ -1520,8 +1591,7 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_F128(_t, _rc);                                  \
-      _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t);                        \
+      _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC;                \
     } break;                                                           \
                                                                        \
     case FLTFMT_INTEL_F80: {                                           \
@@ -1539,12 +1609,10 @@ unsigned fltfmt_encldbl(long double *z_out,
          break;                                                        \
        default: assert(!"unimplemented byte order"); break;            \
       }                                                                        \
-      FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc);                             \
-      _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t);           \
+      _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC;   \
     } break;                                                           \
                                                                        \
     default: {                                                         \
-      struct floatbits *_z = (z_out);                                  \
       ty _x = (x), _y;                                                 \
       unsigned _i, _n, _f = 0;                                         \
       uint32 _t;                                                       \
@@ -1594,7 +1662,7 @@ unsigned fltfmt_encldbl(long double *z_out,
  *
  * Returns:    Error flags (@FLTERR_...@).
  *
- * Use:                Decode the native C floatingpoint value @x@ and store the
+ * Use:                Decode the native C floating-point value @x@ and store the
  *             result in @z_out@.
  *
  *             The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
@@ -1611,7 +1679,7 @@ unsigned fltfmt_encldbl(long double *z_out,
  *                 error bit is set and the decoded payload is left empty.
  *
  *               * If the implementation's floating-point radix is not a
- *                 power of two, and @x@ is a nonzero finite value, then
+ *                 power of two, and @x@ is a nonzero finite value, then the
  *                 @FLTERR_INEXACT@ error bit is set (unconditionally), and
  *                 the rounded value (according to the rounding mode @r@) is
  *                 stored in as many fraction words as necessary to identify