#include "bits.h"
#include "buf.h"
-#include "maths.h"
+#include "fltfmt.h"
+#include "macros.h"
-/*----- Formatting primitives ---------------------------------------------*/
+/*----- Constants ---------------------------------------------------------*/
-/* We use the IEEE 754 `binary64' format. Briefly:
- *
- * * The top bit is the sign %$s$%: 0 encodes %$s = +1$%, and 1 encodes
- * %$s = -1$%.. The format is signed-magnitude, so everything else is
- * the same for positive and negative numbers.
- *
- * * The next eleven bits are the biased exponent %$e$%.
- *
- * * The remaining 52 bits are the significand %$m$%.
- *
- * If %$0 < e < 2047$% then the encoding represents the normal number
- * %$s \cdot (1 + m/2^{52}) \cdot 2^{e-1023}$%.
- *
- * If %$e = 0$% and %$m = 0$% then the encoding represents positive or
- * negative zero.
- *
- * If %$e = 0$% and %$m \ne 0$% then the encoding represents a subnormal
- * number %$s \cdot m/2^{52} \cdot 2^{-1022}$%.
- *
- * If %$e = 2047$% and %$m = 0$% then the encoding represents positive or
- * negative infinity.
- *
- * If %$e = 2047$% and %$m \ne 0$% then the encoding represents a NaN. If
- * the most significant bit of %$m$% is set then this is a quiet NaN;
- * otherwise it's a signalling NaN.
- */
-
-/* --- @k64_to_f64@ --- *
- *
- * Arguments: @double *x_out@ = where to put the result
- * @kludge64 k@ = a 64-bit encoding of a floating-point value
- *
- * Returns: Zero on success, @-1@ on failure.
- *
- * Use: Decodes @k@ as a `binary64' value. See `buf_getf64' for the
- * caveats.
+/* Tolerable errors. These aren't great, and all of them imply a failure to
+ * faithfully pass the value on, but they're also the inevitable consequence
+ * of having different floating-point systems.
*/
+#define IGNERR (FLTERR_INEXACT | FLTERR_OFLOW | FLTERR_UFLOW)
-static int k64_to_f64(double *x_out, kludge64 k)
-{
- uint32 lo, hi, t;
- int s, e; double x;
-
- /* We're using the IEEE 754 `binary64' format: see `float_to_k64' above. */
-
- /* Pick the encoded number apart. */
- hi = HI64(k); lo = LO64(k);
- s = (hi >> 31)&1; e = (hi >> 20)&0x07ff; t = hi&0x000fffff;
-
- /* Deal with various special cases. */
- if (e == 2047) {
- /* Maximum exponent indicates (positive or negative) infinity or NaN. */
-
- if (t || lo) {
- /* It's a NaN. We're not going to be picky about which one. If we
- * can't represent it then we'll just have to fail.
- */
-
-#ifdef NAN
- x = NAN;
-#else
- return (-1);
-#endif
- } else {
- /* It's an infinity. If we don't have one of those to hand, then pick
- * something really big.
- */
+/*----- Main code ---------------------------------------------------------*/
-#ifdef INFINITY
- x = s ? -INFINITY : INFINITY;
-#else
- x = s ? -DBL_MAX : DBL_MAX;
-#endif
- }
- } else {
- /* It's a finite number, though maybe it's weird in some way. */
+#define FORMATS(_) \
+ _(flt, float, f32, 4) \
+ _(dbl, double, f64, 8)
- if (e == 0) {
- /* Minimum exponent indicates zero or a subnormal number. The
- * subnormal exponent is a sentinel value that shouldn't be taken
- * literally, so we should fix that. If the number is actually zero
- * then the exponent won't matter much so don't bother checking.
- */
-
- e = 1;
- } else {
- /* It's a normal number. In which case there's an implicit bit which
- * we can now set.
- */
-
- t |= 0x00100000;
- }
-
- /* All that remains is to stuff the significant and exponent into a
- * floating point number. We'll have to do this in pieces, and we'll
- * lean on the floating-point machinery to do rounding correctly.
- */
- x = ldexp(t, e - 1043) + ldexp(lo, e - 1075);
- if (s) x = -x;
- }
-
- /* And we're done. */
- *x_out = x; return (0);
-}
-
-/* --- @f64_to_k64@ --- *
- *
- * Arguments: @double x@ = a floating-point number
- *
- * Returns: A 64-bit encoding of @x@.
- *
- * Use: Encodes @x@ as a `binary64' value. See `buf_putf64' for the
- * caveats.
- */
-
-static kludge64 f64_to_k64(double x)
-{
- kludge64 k;
- uint32 lo, hi, t;
- int e; double m;
-
- /* Some machinery before we start. */
-
- if (NANP(x)) {
- /* A NaN. */
- hi = 0x7ff80000; lo = 0;
- } else if (INFP(x)) {
- /* Positive or negative infinity. */
- hi = NEGP(x) ? 0xfff00000 : 0x7ff00000; lo = 0;
- } else if (x == 0) {
- /* Positive or negative zero. */
- hi = NEGP(x) ? 0x80000000 : 0; lo = 0;
- } else {
- /* A normal or subnormal number. Now we have to do some actual work. */
-
- /* Let's get the sign dealt with so we don't have to worry about it any
- * more.
- */
- if (!NEGP(x)) hi = 0;
- else { x = -x; hi = 0x80000000; }
-
- /* Now we start on the value. The first thing to do is to split off the
- * exponent. Our number will be %$m \cdot 2^e$%, with %$1/2 \le m < 1$%.
- */
- m = frexp(x, &e);
-
- /* If our number is too big, we'll round it to infinity. This will
- * happen if %$x \ge 2^{1024}$%, i.e., if %$e > 1024$%.
- */
- if (e > 1024)
- { hi |= 0x7ff00000; lo = 0; }
- else {
- /* Our number is sufficiently small that we can represent it at least
- * approximately (though maybe we'll have to flush it to zero). The
- * next step, then, is to pull the significand bits out.
- */
-
- /* Determine the correct exponent to store. We're not going to bias it
- * yet, but this is where we deal with subnormal numbers. Our number
- * is normal if %$x \ge 2^{-1022}$%, i.e., %$e > -1022$%. In this
- * case, there's an implicit bit which we'll clear. Otherwise, if it's
- * subnormal, we'll scale our floating-point number so that the
- * significand will look right when we extract it, and adjust the
- * exponent so that, when we're finally done, it will have the correct
- * sentinel value.
- */
- if (e > -1022) m -= 0.5;
- else { m = ldexp(m, 1021 + e); e = -1022; }
-
- /* Now we pull out the 53 bits of the significand. This will, in
- * general, leave a tail which we address through rounding. Scale it
- * up so that we end up with %$0 \le m' < 2$%; then we round up if
- * %$m > 1$%, or if %$m = 1$% and the low bit of the significand is
- * set.
- */
- t = ldexp(m, 21); m -= ldexp(t, -21);
- lo = ldexp(m, 53); m -= ldexp(lo, -53);
- m = ldexp(m, 54);
-
- /* Round the number if necessary. */
- if (lo&1 ? m >= 1.0 : m > 1)
- { lo = U32(lo + 1); if (!lo) t++; }
-
- /* Now we just put the pieces together. Note that our %$e$% is one
- * greater than it should be, because our implicit bit should have
- * been the unit bit not the 1/2 bit.
- */
- hi |= ((uint32)(e + 1022) << 20) | t;
- }
- }
-
- /* Convert to external format and go home. */
- SET64(k, hi, lo); return (k);
-}
-
-/*----- External functions ------------------------------------------------*/
-
-/* --- @buf_getf64{,l,b} --- *
+/* --- @buf_getf{32,64}{,l,b} --- *
*
* Arguments: @buf *b@ = a buffer to read from
- * @double *x_out@ = where to put the result
- *
- * Returns: Zero on success, @-1@ on failure (and the buffer is broken).
+ * @float *x_out@, @double *x_out@ = where to put the result
*
- * If the system supports NaNs, then any encoded NaN is returned
- * as the value of @NAN@ in @<math.h>@; otherwise, this function
- * reports failure.
+ * Returns: Zero on success, %$-1$% on failure (and the buffer is
+ * broken).
*
- * In general, values are rounded to the nearest available
- * value, in the way that the system usually rounds. If the
- * system doesn't support infinities, then any encoded infinity
- * is reported as the largest-possible-magnitude finite value
- * instead.
+ * Use: Get an IEEE Binary32 or Binary64 value from the buffer.
+ * Conversion is performed using the `fltfmt' machinery, with
+ * the usual round-to-nearest/ties-to-even rounding mode.
*/
-int buf_getf64(buf *b, double *x_out)
-{
- kludge64 k;
-
- if (buf_getk64(b, &k)) return (-1);
- if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); }
- return (0);
-}
+#define DEFGET1(ty, cty, fty, e, xe, w) \
+ int GLUE3(buf_get, fty, xe)(buf *b, cty *x_out) \
+ { \
+ const octet *p; \
+ unsigned err; \
+ \
+ p = buf_get(b, w); if (!p) return (-1); \
+ err = fltfmt_##fty##e##to##ty(x_out, p, FLTRND_NEAREVEN); \
+ if (err&~IGNERR) { BBREAK(b); return (-1); } \
+ return (0); \
+ } \
+ int (GLUE3(dbuf_get, fty, xe))(dbuf *db, cty *x_out) \
+ { return (GLUE3(dbuf_get, fty, xe)(db, x_out)); }
-int buf_getf64l(buf *b, double *x_out)
-{
- kludge64 k;
+#define DEFGET(ty, cty, fty, w) \
+ DEFGET1(ty, cty, fty, b, EMPTY, w) \
+ DEFGET1(ty, cty, fty, l, l, w) \
+ DEFGET1(ty, cty, fty, b, b, w)
- if (buf_getk64l(b, &k)) return (-1);
- if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); }
- return (0);
-}
+FORMATS(DEFGET)
-int buf_getf64b(buf *b, double *x_out)
-{
- kludge64 k;
+#undef DEFGET1
+#undef DEFGET
- if (buf_getk64b(b, &k)) return (-1);
- if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); }
- return (0);
-}
-
-int (dbuf_getf64)(dbuf *db, double *x_out)
- { return (dbuf_getf64(db, x_out)); }
-int (dbuf_getf64l)(dbuf *db, double *x_out)
- { return (dbuf_getf64l(db, x_out)); }
-int (dbuf_getf64b)(dbuf *db, double *x_out)
- { return (dbuf_getf64b(db, x_out)); }
-
-/* --- @buf_putf64{,l,b} --- *
+/* --- @buf_putf{32,64}{,l,b} --- *
*
* Arguments: @buf *b@ = a buffer to write to
* @double x@ = a number to write
*
- * Returns: Zero on success, @-1@ on failure (and the buffer is broken).
- *
- * On C89, this function can't detect negative zero so these
- * will be silently written as positive zero.
+ * Returns: Zero on success, %$-1$% on failure (and the buffer is
+ * broken).
*
- * This function doesn't distinguish NaNs. Any NaN is written
- * as a quiet NaN with all payload bits zero.
- *
- * A finite value with too large a magnitude to be represented
- * is rounded to the appropriate infinity. Other finite values
- * are rounded as necessary, in the usual IEEE 754 round-to-
- * nearest-or-even way.
+ * Use: Get an IEEE Binary32 or Binary64 value from the buffer.
+ * Conversion is performed using the `fltfmt' machinery, with
+ * the usual round-to-nearest/ties-to-even rounding mode.
*/
-int buf_putf64(buf *b, double x)
- { return (buf_putk64(b, f64_to_k64(x))); }
-int buf_putf64l(buf *b, double x)
- { return (buf_putk64l(b, f64_to_k64(x))); }
-int buf_putf64b(buf *b, double x)
- { return (buf_putk64b(b, f64_to_k64(x))); }
-
-int (dbuf_putf64)(dbuf *db, double x)
- { return (dbuf_putf64(db, x)); }
-int (dbuf_putf64l)(dbuf *db, double x)
- { return (dbuf_putf64l(db, x)); }
-int (dbuf_putf64b)(dbuf *db, double x)
- { return (dbuf_putf64b(db, x)); }
+#define DEFPUT1(ty, cty, fty, e, xe, w) \
+ int GLUE3(buf_put, fty, xe)(buf *b, cty x) \
+ { \
+ octet *p; \
+ unsigned err; \
+ \
+ p = buf_get(b, w); if (!p) return (-1); \
+ err = fltfmt_##ty##to##fty##e(p, x, FLTRND_NEAREVEN); \
+ if (err&~IGNERR) { BBREAK(b); return (-1); } \
+ return (0); \
+ } \
+ int (GLUE3(dbuf_put, fty, xe))(dbuf *db, cty x) \
+ { return (GLUE3(dbuf_put, fty, xe)(db, x)); }
+
+#define DEFPUT(ty, cty, fty, w) \
+ DEFPUT1(ty, cty, fty, b, EMPTY, w) \
+ DEFPUT1(ty, cty, fty, l, l, w) \
+ DEFPUT1(ty, cty, fty, b, b, w)
+
+FORMATS(DEFPUT)
+
+#undef DEFPUT1
+#undef DEFPUT
/*----- That's all, folks -------------------------------------------------*/