#include "bits.h"
#include "fltfmt.h"
#include "growbuf.h"
-#include "macros.h"
#include "maths.h"
+/*----- Preliminary hacking -----------------------------------------------*/
+
+/* The native-format conversions are -- at least if the format is
+ * unrecognized -- dependent on the implementation's rounding. Our own
+ * rounding mode specifications don't fit into the framework very well, but I
+ * still want to respect the prevailing rounding mode.
+ *
+ * The `proper' way to do this is with %|#pragma STDC FENV_ACCESS|%. But
+ * that doesn't actually work on GCC, or on Clang from not too long ago. So
+ * use compiler-specific hacking to support this.
+ */
#if GCC_VERSION_P(4, 4)
# pragma GCC optimize "-frounding-math"
#elif CLANG_VERSION_P(11, 0) && !CLANG_VERSION_P(12, 0)
*/
/* If the region is empty then it's technically true that all of the bits
- * are zero. It's important to be able to do answer the case where
+ * are zero. It's important to be able to answer the case where
* %$\id{from} = \id{to} = 0$% without accessing memory.
*/
assert(to >= from); if (to == from) return (ALLCLEAR);
return (rc);
}
-/*----- IEEE formats ------------------------------------------------------*/
+/*----- IEEE and related formats ------------------------------------------*/
/* IEEE (and related) format descriptions. */
const struct fltfmt_ieeefmt
- fltfmt_mini = { IEEEF_HIDDEN, 4, 4 },
- fltfmt_bf16 = { IEEEF_HIDDEN, 8, 8 },
- fltfmt_f16 = { IEEEF_HIDDEN, 5, 11 },
- fltfmt_f32 = { IEEEF_HIDDEN, 8, 24 },
- fltfmt_f64 = { IEEEF_HIDDEN, 11, 53 },
- fltfmt_f128 = { IEEEF_HIDDEN, 15, 113 },
+ fltfmt_mini = { FLTIF_HIDDEN, 4, 4 },
+ fltfmt_bf16 = { FLTIF_HIDDEN, 8, 8 },
+ fltfmt_f16 = { FLTIF_HIDDEN, 5, 11 },
+ fltfmt_f32 = { FLTIF_HIDDEN, 8, 24 },
+ fltfmt_f64 = { FLTIF_HIDDEN, 11, 53 },
+ fltfmt_f128 = { FLTIF_HIDDEN, 15, 113 },
fltfmt_idblext80 = { 0, 15, 64 };
/* --- @fltfmt_encieee@ ---
/* Determine the output size. */
nb = fmt->prec + fmt->expwd + 1;
- if (fmt->f&IEEEF_HIDDEN) nb--;
+ if (fmt->f&FLTIF_HIDDEN) nb--;
nw = (nb + 31)/32;
/* Determine the top bits. */
*/
z0 |= M32(fmt->expwd) << esh;
- if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+ if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
} else if (f&FLTF_NANMASK) {
/* Not-a-number.
/* Copy the payload.
*
* If the payload is all-zero and we're meant to set a signalling NaN
- * then report an exactness failure and set the low bit.
+ * then report an exactness failure and set the least-significant bit.
*/
mb = fmt->prec - 2; mw = (mb + 31)/32; sh = -mb%32;
- for (i = 0; i < nw - mw; i++) z[i] = 0;
- n = x->n; if (n > mw) n = nw;
- t = shr(z + i, x->frac, n, sh); i += n;
- if (i < nw) z[i++] = t;
- sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
- if (f&FLTF_QNAN) z0 |= B32(sh);
- else if (!fracwd) { ERR(FLTERR_INEXACT); z[nw - 1] |= 1; }
+ n = x->n;
+ if (n < mw) j = 0;
+ else { n = mw; j = sh; }
+ if ((f&FLTF_SNAN) && ms_set_bit(x->frac + n, j, 32*n) == ALLCLEAR) {
+ ERR(FLTERR_INEXACT);
+ n = nw - 1; for (i = 0; i < n; i++) z[i] = 0;
+ z[i++] = 1;
+ } else {
+ for (i = 0; i < nw - mw; i++) z[i] = 0;
+ n = x->n; if (n > mw) n = mw;
+ t = shr(z + i, x->frac, n, sh); i += n;
+ if (i < nw) z[i++] = t;
+ sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
+ if (f&FLTF_QNAN) z0 |= B32(sh);
+ }
/* Set the exponent and, for non-hidden-bit formats, the unit bit. */
z0 |= M32(fmt->expwd) << esh;
- if (!(fmt->f&IEEEF_HIDDEN)) z0 |= B32(esh - 1);
+ if (!(fmt->f&FLTIF_HIDDEN)) z0 |= B32(esh - 1);
} else {
/* A finite value.
* the rounding mode.
*/
+ ERR(FLTERR_OFLOW | FLTERR_INEXACT);
rf = FRPF_ODD | FRPF_HALF | FRPF_LOW;
if (f&FLTF_NEG) rf |= FRPF_NEG;
- if ((r >> rf)&1) {
- ERR(FLTERR_OFLOW | FLTERR_INEXACT);
+ if ((r >> rf)&1)
z0 |= M32(fmt->expwd) << esh;
- } else {
- ERR(FLTERR_INEXACT);
+ else {
z0 |= (B32(fmt->expwd) - 2) << esh;
- mb = fmt->prec; if (fmt->f&IEEEF_HIDDEN) mb--;
+ mb = fmt->prec; if (fmt->f&FLTIF_HIDDEN) mb--;
mw = (mb + 31)/32;
i = nw - mw;
z[i++] = M32(mb%32);
/* Set the biased exponent. */
z0 |= (exp + maxexp) << esh;
- /* Clear the unit bit if we're suppose to use a hidden-bit convention. */
- if (fmt->f&IEEEF_HIDDEN) {
+ /* Clear the unit bit if we're suppose to use a hidden-bit
+ * convention.
+ */
+ if (fmt->f&FLTIF_HIDDEN) {
mb = fmt->prec - 1; mw = (mb + 31)/32; mb = mb%32;
z[nw - mw] &= ~B32(mb);
}
*/
assert(fmt->expwd + 3 <= 32);
esh = 31 - fmt->expwd; emask = M32(fmt->expwd);
- sigwd = fmt->prec; if (fmt->f&IEEEF_HIDDEN) sigwd--;
+ sigwd = fmt->prec; if (fmt->f&FLTIF_HIDDEN) sigwd--;
/* Determine the input size. */
nb = sigwd + fmt->expwd + 1; nw = (nb + 31)/32;
* Note that we don't include the quiet bit in our decoded payload.
*/
- if (!(fmt->f&IEEEF_HIDDEN)) {
+ if (!(fmt->f&FLTIF_HIDDEN)) {
/* No hidden bit, so we expect the unit bit to be set. If it isn't,
* that's technically invalid, and its absence won't survive a round
* trip, since the bit isn't considered part of a NaN payload -- or
if (ms_set_bit(x + nw, 0, sigwd) == ALLCLEAR)
f |= FLTF_INF;
else {
- sh = esh - 2; if (fmt->f&IEEEF_HIDDEN) sh++;
+ sh = esh - 2; if (fmt->f&FLTIF_HIDDEN) sh++;
if (x0&B32(sh)) f |= FLTF_QNAN;
else f |= FLTF_SNAN;
sigwd--; mw = (sigwd + 31)/32;
* Otherwise, we'll normalize the incoming value regardless, but report
* settings of the unit bit which are inconsistent with the exponent.
*/
- if (fmt->f&IEEEF_HIDDEN) {
+ if (fmt->f&FLTIF_HIDDEN) {
if (!t) { exp = minexp; goto normalize; }
else { exp = t - maxexp; goto hidden; }
} else {
# define DIGIT_BITS 4
#endif
+/* Take note if we need to cope with the revered quiet/signalling convention
+ * used by HP-PA and older MIPS processors.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+# define FROB_NANS
+#endif
+
/* --- @ENCFLT@ --- *
*
* Arguments: @ty@ = the C type to encode
* requisite compile-time configuration machinery is worth the effort.)
*/
-# define SETINF(TY, rc, z) \
- do { (z) = TY##_MAX; (rc) = FLTERR_OFLOW | FLTERR_INEXACT; } while (0)
+# define SETINF(TY, rc, z) do { \
+ (z) = TY##_MAX; \
+ (rc) = FLTERR_OFLOW | FLTERR_INEXACT; \
+ } while (0)
#endif
#ifdef DIGIT_BITS
#endif
+#ifdef FROB_NANS
+ /* The native floating point format uses the opposite quiet-vs-signalling
+ * NaN convention from the recommended `quiet bit' convention, so the bit
+ * needs hacking on input.
+ */
+
+# define FROBNAN_ENCDECLS struct floatbits _y
+# define FROBNAN_ENC do { \
+ if (_x->f&FLTF_NANMASK) { \
+ _y.f = _x->f ^ FLTF_NANMASK; _y.frac = _x->frac; _y.n = _x->n; \
+ _x = &_y; \
+ } \
+ } while (0)
+#else
+ /* The native floating point format either uses the conventional
+ * `quiet-bit' convention, or isn't IEEE at all. Either way, there's
+ * nothing to do here.
+ */
+
+# define FROBNAN_ENCDECLS
+# define FROBNAN_ENC do ; while (0)
+#endif
+
#define ENCFLT(ty, TY, ldexp, rc, z_out, x, r) do { \
+ const struct floatbits *_x = (x); \
unsigned _rc = 0; \
+ FROBNAN_ENCDECLS; \
\
/* See if the native format is one that we recognize. */ \
switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \
uint32 _t[1]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- (rc) = fltfmt_encieee(&fltfmt_f32, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_F32(_t, _rc); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f32, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: STORE32_B(_z, _t[0]); break; \
case FLTFMT_LE: STORE32_L(_z, _t[0]); break; \
case FLTFMT_IEEE_F64: { \
uint32 _t[2]; \
unsigned char *_z = (unsigned char *)(z_out); \
- (rc) = fltfmt_encieee(&fltfmt_f64, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_F64(_t, _rc); \
+ \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f64, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \
uint32 _t[4]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- FLTFMT__FROB_NAN_F128(_t, _rc); \
- (rc) = fltfmt_encieee(&fltfmt_f128, _t, (x), (r), FLTERR_ALLERRS); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_f128, _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE32_B(_z + 0, _t[0]); STORE32_B(_z + 4, _t[1]); \
uint32 _t[3]; \
unsigned char *_z = (unsigned char *)(z_out); \
\
- (rc) = fltfmt_encieee(&fltfmt_idblext80, _t, (x), (r), FLTERR_ALLERRS); \
- FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \
+ FROBNAN_ENC; \
+ (rc) = fltfmt_encieee(&fltfmt_idblext80, \
+ _t, _x, (r), FLTERR_ALLERRS); \
switch (TY##_FORMAT&FLTFMT_ENDMASK) { \
case FLTFMT_BE: \
STORE16_B(_z + 0, _t[0]); \
default: { \
/* We must do this the hard way. */ \
\
- const struct floatbits *_x = (x); \
ty _z; \
unsigned _i; \
ENC_ROUND_DECLS; \
* zero.
*
* * If the implementation's floating-point radix is not a
- * power of two, and @x@ is a nonzero finite value, then
+ * power of two, and @x@ is a nonzero finite value, then the
* @FLTERR_INEXACT@ error bit is set (unconditionally), and
* the value is rounded by the implementation using its
* prevailing rounding policy. If the radix is a power of
} while (0)
#endif
+#ifdef FROB_NANS
+ /* The native floating point format uses the opposite quiet-vs-signalling
+ * NaN convention from the recommended `quiet bit' convention, so the bit
+ * needs hacking on output.
+ */
+
+# define FROBNAN_DEC do { \
+ if (_z->f&FLTF_NANMASK) _z->f ^= FLTF_NANMASK; \
+ } while (0)
+#else
+ /* The native floating point format either uses the conventional
+ * `quiet-bit' convention, or isn't IEEE at all. Either way, there's
+ * nothing to do here.
+ */
+
+# define FROBNAN_DEC do ; while (0)
+#endif
+
#define DECFLT(ty, TY, frexp, rc, z_out, x, r) do { \
+ struct floatbits *_z = (z_out); \
unsigned _rc = 0; \
\
switch (TY##_FORMAT&(FLTFMT_ORGMASK | FLTFMT_TYPEMASK)) { \
case FLTFMT_LE: _t[0] = LOAD32_L(_x); break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F32(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f32, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f32, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_IEEE_F64: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F64(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f64, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f64, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_IEEE_F128: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_F128(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_f128, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_f128, _z, _t); FROBNAN_DEC; \
} break; \
\
case FLTFMT_INTEL_F80: { \
break; \
default: assert(!"unimplemented byte order"); break; \
} \
- FLTFMT__FROB_NAN_IDBLEXT80(_t, _rc); \
- _rc |= fltfmt_decieee(&fltfmt_idblext80, (z_out), _t); \
+ _rc |= fltfmt_decieee(&fltfmt_idblext80, _z, _t); FROBNAN_DEC; \
} break; \
\
default: { \
- struct floatbits *_z = (z_out); \
ty _x = (x), _y; \
unsigned _i, _n, _f = 0; \
uint32 _t; \
*
* Returns: Error flags (@FLTERR_...@).
*
- * Use: Decode the native C floatingpoint value @x@ and store the
+ * Use: Decode the native C floating-point value @x@ and store the
* result in @z_out@.
*
* The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
* error bit is set and the decoded payload is left empty.
*
* * If the implementation's floating-point radix is not a
- * power of two, and @x@ is a nonzero finite value, then
+ * power of two, and @x@ is a nonzero finite value, then the
* @FLTERR_INEXACT@ error bit is set (unconditionally), and
* the rounded value (according to the rounding mode @r@) is
* stored in as many fraction words as necessary to identify