X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/mLib/blobdiff_plain/289651a7df7bc48724137cd0faaf8535fb54e73b..refs/heads/mdw/tvec:/utils/fltfmt.c diff --git a/utils/fltfmt.c b/utils/fltfmt.c index 89c705d..177004e 100644 --- a/utils/fltfmt.c +++ b/utils/fltfmt.c @@ -39,9 +39,19 @@ #include "bits.h" #include "fltfmt.h" #include "growbuf.h" -#include "macros.h" #include "maths.h" +/*----- Preliminary hacking -----------------------------------------------*/ + +/* The native-format conversions are -- at least if the format is + * unrecognized -- dependent on the implementation's rounding. Our own + * rounding mode specifications don't fit into the framework very well, but I + * still want to respect the prevailing rounding mode. + * + * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%. But + * that doesn't actually work on GCC, or on Clang from not too long ago. So + * use compiler-specific hacking to support this. + */ #if GCC_VERSION_P(4, 4) # pragma GCC optimize "-frounding-math" #elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0) @@ -291,7 +301,7 @@ static unsigned ms_set_bit(const uint32 *x, unsigned from, unsigned to) */ /* If the region is empty then it's technically true that all of the bits - * are zero. It's important to be able to do answer the case where + * are zero. It's important to be able to answer the case where * %$\id{from} = \id{to} = 0$% without accessing memory. */ assert(to >= from); if (to == from) return (ALLCLEAR); @@ -531,16 +541,16 @@ unsigned fltfmt_round(struct floatbits *z_out, const struct floatbits *x, return (rc); } -/*----- IEEE formats ------------------------------------------------------*/ +/*----- IEEE and related formats ------------------------------------------*/ /* IEEE (and related) format descriptions. */ const struct fltfmt_ieeefmt - fltfmt_mini = { IEEEF_HIDDEN, 4, 4 }, - fltfmt_bf16 = { IEEEF_HIDDEN, 8, 8 }, - fltfmt_f16 = { IEEEF_HIDDEN, 5, 11 }, - fltfmt_f32 = { IEEEF_HIDDEN, 8, 24 }, - fltfmt_f64 = { IEEEF_HIDDEN, 11, 53 }, - fltfmt_f128 = { IEEEF_HIDDEN, 15, 113 }, + fltfmt_mini = { FLTIF_HIDDEN, 4, 4 }, + fltfmt_bf16 = { FLTIF_HIDDEN, 8, 8 }, + fltfmt_f16 = { FLTIF_HIDDEN, 5, 11 }, + fltfmt_f32 = { FLTIF_HIDDEN, 8, 24 }, + fltfmt_f64 = { FLTIF_HIDDEN, 11, 53 }, + fltfmt_f128 = { FLTIF_HIDDEN, 15, 113 }, fltfmt_idblext80 = { 0, 15, 64 }; /* --- @fltfmt_encieee@ --- @@ -585,7 +595,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, /* Determine the output size. */ nb = fmt->prec + fmt->expwd + 1; - if (fmt->f&IEEEF_HIDDEN) nb--; + if (fmt->f&FLTIF_HIDDEN) nb--; nw = (nb + 31)/32; /* Determine the top bits. */ @@ -603,7 +613,7 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, */ z0 |= M32(fmt->expwd) << esh; - if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1); + if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1); } else if (f&FLTF_NANMASK) { /* Not-a-number. @@ -622,20 +632,28 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, /* Copy the payload. * * If the payload is all-zero and we're meant to set a signalling NaN - * then report an exactness failure and set the low bit. + * then report an exactness failure and set the least-significant bit. */ mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32; - for (i = 0; i < nw - mw; i++) z[i] = 0; - n = x->n; if (n > mw) n = nw; - t = shr(z + i, x->frac, n, sh); i += n; - if (i < nw) z[i++] = t; - sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++; - if (f&FLTF_QNAN) z0 |= B32(sh); - else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; } + n = x->n; + if (n < mw) j = 0; + else { n = mw; j = sh; } + if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) { + ERR(FLTERR_INEXACT); + n = nw - 1; for (i = 0; i < n; i++) z[i] = 0; + z[i++] = 1; + } else { + for (i = 0; i < nw - mw; i++) z[i] = 0; + n = x->n; if (n > mw) n = mw; + t = shr(z + i, x->frac, n, sh); i += n; + if (i < nw) z[i++] = t; + sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++; + if (f&FLTF_QNAN) z0 |= B32(sh); + } /* Set the exponent and, for non-hidden-bit formats, the unit bit. */ z0 |= M32(fmt->expwd) << esh; - if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1); + if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1); } else { /* A finite value. @@ -711,15 +729,14 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, * the rounding mode. */ + ERR(FLTERR_OFLOW | FLTERR_INEXACT); rf = FRPF_ODD | FRPF_HALF | FRPF_LOW; if (f&FLTF_NEG) rf |= FRPF_NEG; - if ((r >> rf)&1) { - ERR(FLTERR_OFLOW | FLTERR_INEXACT); + if ((r >> rf)&1) z0 |= M32(fmt->expwd) << esh; - } else { - ERR(FLTERR_INEXACT); + else { z0 |= (B32(fmt->expwd) - 2) << esh; - mb = fmt->prec; if (fmt->f&IEEEF_HIDDEN) mb--; + mb = fmt->prec; if (fmt->f&FLTIF_HIDDEN) mb--; mw = (mb + 31)/32; i = nw - mw; z[i++] = M32(mb%32); @@ -740,8 +757,10 @@ unsigned fltfmt_encieee(const struct fltfmt_ieeefmt *fmt, /* Set the biased exponent. */ z0 |= (exp + maxexp) << esh; - /* Clear the unit bit if we're suppose to use a hidden-bit convention. */ - if (fmt->f&IEEEF_HIDDEN) { + /* Clear the unit bit if we're suppose to use a hidden-bit + * convention. + */ + if (fmt->f&FLTIF_HIDDEN) { mb = fmt->prec - 1; mw = (mb + 31)/32; mb = mb%32; z[nw - mw] &= ~B32(mb); } @@ -944,7 +963,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt, */ assert(fmt->expwd + 3 <= 32); esh = 31 - fmt->expwd; emask = M32(fmt->expwd); - sigwd = fmt->prec; if (fmt->f&IEEEF_HIDDEN) sigwd--; + sigwd = fmt->prec; if (fmt->f&FLTIF_HIDDEN) sigwd--; /* Determine the input size. */ nb = sigwd + fmt->expwd + 1; nw = (nb + 31)/32; @@ -964,7 +983,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt, * Note that we don't include the quiet bit in our decoded payload. */ - if (!(fmt->f&IEEEF_HIDDEN)) { + if (!(fmt->f&FLTIF_HIDDEN)) { /* No hidden bit, so we expect the unit bit to be set. If it isn't, * that's technically invalid, and its absence won't survive a round * trip, since the bit isn't considered part of a NaN payload -- or @@ -980,7 +999,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt, if (ms_set_bit(x + nw, 0, sigwd) == ALLCLEAR) f |= FLTF_INF; else { - sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++; + sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++; if (x0&B32(sh)) f |= FLTF_QNAN; else f |= FLTF_SNAN; sigwd--; mw = (sigwd + 31)/32; @@ -998,7 +1017,7 @@ unsigned fltfmt_decieee(const struct fltfmt_ieeefmt *fmt, * Otherwise, we'll normalize the incoming value regardless, but report * settings of the unit bit which are inconsistent with the exponent. */ - if (fmt->f&IEEEF_HIDDEN) { + if (fmt->f&FLTIF_HIDDEN) { if (!t) { exp = minexp; goto normalize; } else { exp = t - maxexp; goto hidden; } } else { @@ -1124,6 +1143,13 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) # define DIGIT_BITS 4 #endif +/* Take note if we need to cope with the revered quiet/signalling convention + * used by HP-PA and older MIPS processors. + */ +#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008)) +# define FROB_NANS +#endif + /* --- @ENCFLT@ --- * * * Arguments: @ty@ = the C type to encode @@ -1179,8 +1205,10 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) * requisite compile-time configuration machinery is worth the effort.) */ -# define SETINF(TY, rc, z) \ - do { (z) = TY##_MAX; (rc) = FLTERR_OFLOW | FLTERR_INEXACT; } while (0) +# define SETINF(TY, rc, z) do { \ + (z) = TY##_MAX; \ + (rc) = FLTERR_OFLOW | FLTERR_INEXACT; \ + } while (0) #endif #ifdef DIGIT_BITS @@ -1206,8 +1234,33 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) #endif +#ifdef FROB_NANS + /* The native floating point format uses the opposite quiet-vs-signalling + * NaN convention from the recommended `quiet bit' convention, so the bit + * needs hacking on input. + */ + +# define FROBNAN_ENCDECLS struct floatbits _y +# define FROBNAN_ENC do { \ + if (_x->f&FLTF_NANMASK) { \ + _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n; \ + _x = &_y; \ + } \ + } while (0) +#else + /* The native floating point format either uses the conventional + * `quiet-bit' convention, or isn't IEEE at all. Either way, there's + * nothing to do here. + */ + +# define FROBNAN_ENCDECLS +# define FROBNAN_ENC do ; while (0) +#endif + #define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do { \ + const struct floatbits *_x = (x); \ unsigned _rc = 0; \ + FROBNAN_ENCDECLS; \ \ /* See if the native format is one that we recognize. */ \ switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \ @@ -1216,8 +1269,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[1]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_F32(_t, _rc); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: STORE32_B(_z, _t[0]); break; \ case FLTFMT_LE: STORE32_L(_z, _t[0]); break; \ @@ -1228,8 +1281,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) case FLTFMT_IEEE_F64: { \ uint32 _t[2]; \ unsigned char *_z = (unsigned char *)(z_out); \ - (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_F64(_t, _rc); \ + \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \ @@ -1248,8 +1302,8 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[4]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - FLTFMT__FROB_NAN_F128(_t, _rc); \ - (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \ @@ -1267,8 +1321,9 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) uint32 _t[3]; \ unsigned char *_z = (unsigned char *)(z_out); \ \ - (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \ - FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \ + FROBNAN_ENC; \ + (rc) = fltfmt_encieee(&fltfmt_idblext80, \ + _t, _x, (r), FLTERR_ALLERRS); \ switch (TY##_FORMAT&FLTFMT_ENDMASK) { \ case FLTFMT_BE: \ STORE16_B(_z + 0, _t[0]); \ @@ -1285,7 +1340,6 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) default: { \ /* We must do this the hard way. */ \ \ - const struct floatbits *_x = (x); \ ty _z; \ unsigned _i; \ ENC_ROUND_DECLS; \ @@ -1372,7 +1426,7 @@ unsigned fltfmt_decidblext80(struct floatbits *z_out, uint16 se, kludge64 m) * zero. * * * If the implementation's floating-point radix is not a - * power of two, and @x@ is a nonzero finite value, then + * power of two, and @x@ is a nonzero finite value, then the * @FLTERR_INEXACT@ error bit is set (unconditionally), and * the value is rounded by the implementation using its * prevailing rounding policy. If the radix is a power of @@ -1467,7 +1521,26 @@ unsigned fltfmt_encldbl(long double *z_out, } while (0) #endif +#ifdef FROB_NANS + /* The native floating point format uses the opposite quiet-vs-signalling + * NaN convention from the recommended `quiet bit' convention, so the bit + * needs hacking on output. + */ + +# define FROBNAN_DEC do { \ + if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK; \ + } while (0) +#else + /* The native floating point format either uses the conventional + * `quiet-bit' convention, or isn't IEEE at all. Either way, there's + * nothing to do here. + */ + +# define FROBNAN_DEC do ; while (0) +#endif + #define DECFLT(ty, TY, frexp, rc, z_out, x, r) do { \ + struct floatbits *_z = (z_out); \ unsigned _rc = 0; \ \ switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \ @@ -1481,8 +1554,7 @@ unsigned fltfmt_encldbl(long double *z_out, case FLTFMT_LE: _t[0] = LOAD32_L(_x); break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F32(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_IEEE_F64: { \ @@ -1501,8 +1573,7 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F64(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_IEEE_F128: { \ @@ -1520,8 +1591,7 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_F128(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC; \ } break; \ \ case FLTFMT_INTEL_F80: { \ @@ -1539,12 +1609,10 @@ unsigned fltfmt_encldbl(long double *z_out, break; \ default: assert(!"unimplemented byte order"); break; \ } \ - FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \ - _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t); \ + _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC; \ } break; \ \ default: { \ - struct floatbits *_z = (z_out); \ ty _x = (x), _y; \ unsigned _i, _n, _f = 0; \ uint32 _t; \ @@ -1594,7 +1662,7 @@ unsigned fltfmt_encldbl(long double *z_out, * * Returns: Error flags (@FLTERR_...@). * - * Use: Decode the native C floatingpoint value @x@ and store the + * Use: Decode the native C floating-point value @x@ and store the * result in @z_out@. * * The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a @@ -1611,7 +1679,7 @@ unsigned fltfmt_encldbl(long double *z_out, * error bit is set and the decoded payload is left empty. * * * If the implementation's floating-point radix is not a - * power of two, and @x@ is a nonzero finite value, then + * power of two, and @x@ is a nonzero finite value, then the * @FLTERR_INEXACT@ error bit is set (unconditionally), and * the rounded value (according to the rounding mode @r@) is * stored in as many fraction words as necessary to identify