X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/mLib/blobdiff_plain/67b5031ec6d160b5cae425466a34d1be3b211dd4..b1a20beee623c83315c3ce21abc7bcce103c6efb:/struct/buf-float.c?ds=sidebyside diff --git a/struct/buf-float.c b/struct/buf-float.c index 88d6c44..5b5b625 100644 --- a/struct/buf-float.c +++ b/struct/buf-float.c @@ -32,274 +32,158 @@ #include "bits.h" #include "buf.h" -#include "maths.h" +#include "fltfmt.h" -/*----- Formatting primitives ---------------------------------------------*/ +/*----- External functions ------------------------------------------------*/ -/* We use the IEEE 754 `binary64' format. Briefly: - * - * * The top bit is the sign %$s$%: 0 encodes %$s = +1$%, and 1 encodes - * %$s = -1$%.. The format is signed-magnitude, so everything else is - * the same for positive and negative numbers. - * - * * The next eleven bits are the biased exponent %$e$%. - * - * * The remaining 52 bits are the significand %$m$%. - * - * If %$0 < e < 2047$% then the encoding represents the normal number - * %$s \cdot (1 + m/2^{52}) \cdot 2^{e-1023}$%. +/* --- @buf_getf{32,64}{,l,b} --- * * - * If %$e = 0$% and %$m = 0$% then the encoding represents positive or - * negative zero. - * - * If %$e = 0$% and %$m \ne 0$% then the encoding represents a subnormal - * number %$s \cdot m/2^{52} \cdot 2^{-1022}$%. + * Arguments: @buf *b@ = a buffer to read from + * @float *x_out@, @double *x_out@ = where to put the result * - * If %$e = 2047$% and %$m = 0$% then the encoding represents positive or - * negative infinity. + * Returns: Zero on success, %$-1$% on failure (and the buffer is + * broken). * - * If %$e = 2047$% and %$m \ne 0$% then the encoding represents a NaN. If - * the most significant bit of %$m$% is set then this is a quiet NaN; - * otherwise it's a signalling NaN. + * Use: Get an IEEE Binary32 or Binary64 value from the buffer. + * Conversion is performed using the `fltfmt' machinery, with + * the usual round-to-nearest/ties-to-even rounding mode. */ -/* --- @f64_to_k64@ --- * - * - * Arguments: @double x@ = a floating-point number - * - * Returns: A 64-bit encoding of @x@. - * - * Use: Encodes @x@ as a `binary64' value. See `buf_putf64' for the - * caveats. - */ +int buf_getf32(buf *b, float *x_out) +{ + const octet *p; + + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_f32btoflt(x_out, p, FLTRND_NEAREVEN); return (0); +} -static kludge64 f64_to_k64(double x) +int buf_getf32l(buf *b, float *x_out) { - kludge64 k; - uint32 lo, hi, t; - int e; double m; - - /* Some machinery before we start. */ - - if (NANP(x)) { - /* A NaN. */ - hi = 0x7ff80000; lo = 0; - } else if (INFP(x)) { - /* Positive or negative infinity. */ - hi = NEGP(x) ? 0xfff00000 : 0x7ff00000; lo = 0; - } else if (x == 0) { - /* Positive or negative zero. */ - hi = NEGP(x) ? 0x80000000 : 0; lo = 0; - } else { - /* A normal or subnormal number. Now we have to do some actual work. */ - - /* Let's get the sign dealt with so we don't have to worry about it any - * more. - */ - if (!NEGP(x)) hi = 0; - else { x = -x; hi = 0x80000000; } - - /* Now we start on the value. The first thing to do is to split off the - * exponent. Our number will be %$m \cdot 2^e$%, with %$1/2 \le m < 1$%. - */ - m = frexp(x, &e); - - /* If our number is too big, we'll round it to infinity. This will - * happen if %$x \ge 2^{1024}$%, i.e., if %$e > 1024$%. - */ - if (e > 1024) - { hi |= 0x7ff00000; lo = 0; } - else { - /* Our number is sufficiently small that we can represent it at least - * approximately (though maybe we'll have to flush it to zero). The - * next step, then, is to pull the significand bits out. - */ - - /* Determine the correct exponent to store. We're not going to bias it - * yet, but this is where we deal with subnormal numbers. Our number - * is normal if %$x \ge 2^{-1022}$%, i.e., %$e > -1022$%. In this - * case, there's an implicit bit which we'll clear. Otherwise, if it's - * subnormal, we'll scale our floating-point number so that the - * significand will look right when we extract it, and adjust the - * exponent so that, when we're finally done, it will have the correct - * sentinel value. - */ - if (e > -1022) m -= 0.5; - else { m = ldexp(m, 1021 + e); e = -1022; } - - /* Now we pull out the 53 bits of the significand. This will, in - * general, leave a tail which we address through rounding. Scale it - * up so that we end up with %$0 \le m' < 2$%; then we round up if - * %$m > 1$%, or if %$m = 1$% and the low bit of the significand is - * set. - */ - t = ldexp(m, 21); m -= ldexp(t, -21); - lo = ldexp(m, 53); m -= ldexp(lo, -53); - m = ldexp(m, 54); - - /* Round the number if necessary. */ - if (lo&1 ? m >= 1.0 : m > 1) - { lo = U32(lo + 1); if (!lo) t++; } - - /* Now we just put the pieces together. Note that our %$e$% is one - * greater than it should be, because our implicit bit should have - * been the unit bit not the 1/2 bit. - */ - hi |= ((uint32)(e + 1022) << 20) | t; - } - } - - /* Convert to external format and go home. */ - SET64(k, hi, lo); return (k); + const octet *p; + + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0); } -/* --- @k64_to_f64@ --- * - * - * Arguments: @double *x_out@ = where to put the result - * @kludge64 k@ = a 64-bit encoding of a floating-point value - * - * Returns: Zero on success, @-1@ on failure. - * - * Use: Decodes @k@ as a `binary64' value. See `buf_getf64' for the - * caveats. - */ +int buf_getf32b(buf *b, float *x_out) +{ + const octet *p; + + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_f32ltoflt(x_out, p, FLTRND_NEAREVEN); return (0); +} -static int k64_to_f64(double *x_out, kludge64 k) +int (dbuf_getf32)(dbuf *db, float *x_out) + { return (dbuf_getf32(db, x_out)); } +int (dbuf_getf32l)(dbuf *db, float *x_out) + { return (dbuf_getf32l(db, x_out)); } +int (dbuf_getf32b)(dbuf *db, float *x_out) + { return (dbuf_getf32b(db, x_out)); } + +int buf_getf64(buf *b, double *x_out) { - uint32 lo, hi, t; - int s, e; double x; - - /* We're using the IEEE 754 `binary64' format: see `float_to_k64' above. */ - - /* Pick the encoded number apart. */ - hi = HI64(k); lo = LO64(k); - s = (hi >> 31)&1; e = (hi >> 20)&0x07ff; t = hi&0x000fffff; - - /* Deal with various special cases. */ - if (e == 2047) { - /* Maximum exponent indicates (positive or negative) infinity or NaN. */ - - if (t || lo) { - /* It's a NaN. We're not going to be picky about which one. If we - * can't represent it then we'll just have to fail. - */ - -#ifdef NAN - x = NAN; -#else - return (-1); -#endif - } else { - /* It's an infinity. If we don't have one of those to hand, then pick - * something really big. - */ - -#ifdef INFINITY - x = s ? -INFINITY : INFINITY; -#else - x = s ? -DBL_MAX : DBL_MAX; -#endif - } - } else { - /* It's a finite number, though maybe it's weird in some way. */ - - if (e == 0) { - /* Minimum exponent indicates zero or a subnormal number. The - * subnormal exponent is a sentinel value that shouldn't be taken - * literally, so we should fix that. If the number is actually zero - * then the exponent won't matter much so don't bother checking. - */ - - e = 1; - } else { - /* It's a normal number. In which case there's an implicit bit which - * we can now set. - */ - - t |= 0x00100000; - } - - /* All that remains is to stuff the significant and exponent into a - * floating point number. We'll have to do this in pieces, and we'll - * lean on the floating-point machinery to do rounding correctly. - */ - x = ldexp(t, e - 1043) + ldexp(lo, e - 1075); - if (s) x = -x; - } - - /* And we're done. */ - *x_out = x; return (0); + const octet *p; + + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_f64btodbl(x_out, p, FLTRND_NEAREVEN); return (0); } -/*----- External functions ------------------------------------------------*/ +int buf_getf64l(buf *b, double *x_out) +{ + const octet *p; + + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0); +} -/* --- @buf_putf64{,b,l} --- * +int buf_getf64b(buf *b, double *x_out) +{ + const octet *p; + + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_f64ltodbl(x_out, p, FLTRND_NEAREVEN); return (0); +} + +int (dbuf_getf64)(dbuf *db, double *x_out) + { return (dbuf_getf64(db, x_out)); } +int (dbuf_getf64l)(dbuf *db, double *x_out) + { return (dbuf_getf64l(db, x_out)); } +int (dbuf_getf64b)(dbuf *db, double *x_out) + { return (dbuf_getf64b(db, x_out)); } + +/* --- @buf_putf{32,64}{,l,b} --- * * * Arguments: @buf *b@ = a buffer to write to * @double x@ = a number to write * - * Returns: Zero on success, @-1@ on failure (and the buffer is broken). - * - * On C89, this function can't detect negative zero so these - * will be silently written as positive zero. + * Returns: Zero on success, %$-1$% on failure (and the buffer is + * broken). * - * This function doesn't distinguish NaNs. Any NaN is written - * as a quiet NaN with all payload bits zero. - * - * A finite value with too large a magnitude to be represented - * is rounded to the appropriate infinity. Other finite values - * are rounded as necessary, in the usual IEEE 754 round-to- - * nearest-or-even way. + * Use: Get an IEEE Binary32 or Binary64 value from the buffer. + * Conversion is performed using the `fltfmt' machinery, with + * the usual round-to-nearest/ties-to-even rounding mode. */ -int buf_putf64(buf *b, double x) - { return (buf_putk64(b, f64_to_k64(x))); } -int buf_putf64b(buf *b, double x) - { return (buf_putk64b(b, f64_to_k64(x))); } -int buf_putf64l(buf *b, double x) - { return (buf_putk64l(b, f64_to_k64(x))); } +int buf_putf32(buf *b, float x) +{ + octet *p; -/* --- @buf_getf64{,b,l} --- * - * - * Arguments: @buf *b@ = a buffer to read from - * @double *x_out@ = where to put the result - * - * Returns: Zero on success, @-1@ on failure (and the buffer is broken). - * - * If the system supports NaNs, then any encoded NaN is returned - * as the value of @NAN@ in @@; otherwise, this function - * reports failure. - * - * In general, values are rounded to the nearest available - * value, in the way that the system usually rounds. If the - * system doesn't support infinities, then any encoded infinity - * is reported as the largest-possible-magnitude finite value - * instead. - */ + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0); +} -int buf_getf64(buf *b, double *x_out) +int buf_putf32l(buf *b, float x) { - kludge64 k; + octet *p; - if (buf_getk64(b, &k)) return (-1); - if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); } - return (0); + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_flttof32l(p, x, FLTRND_NEAREVEN); return (0); } -int buf_getf64b(buf *b, double *x_out) + +int buf_putf32b(buf *b, float x) { - kludge64 k; + octet *p; - if (buf_getk64b(b, &k)) return (-1); - if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); } - return (0); + p = buf_get(b, 4); if (!p) return (-1); + fltfmt_flttof32b(p, x, FLTRND_NEAREVEN); return (0); } -int buf_getf64l(buf *b, double *x_out) + +int (dbuf_putf32)(dbuf *db, float x) + { return (dbuf_putf32(db, x)); } +int (dbuf_putf32l)(dbuf *db, float x) + { return (dbuf_putf32l(db, x)); } +int (dbuf_putf32b)(dbuf *db, float x) + { return (dbuf_putf32b(db, x)); } + +int buf_putf64(buf *b, double x) +{ + octet *p; + + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0); +} + +int buf_putf64l(buf *b, double x) +{ + octet *p; + + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_dbltof64l(p, x, FLTRND_NEAREVEN); return (0); +} + +int buf_putf64b(buf *b, double x) { - kludge64 k; + octet *p; - if (buf_getk64l(b, &k)) return (-1); - if (k64_to_f64(x_out, k)) { b->f |= BF_BROKEN; return (-1); } - return (0); + p = buf_get(b, 8); if (!p) return (-1); + fltfmt_dbltof64b(p, x, FLTRND_NEAREVEN); return (0); } +int (dbuf_putf64)(dbuf *db, double x) + { return (dbuf_putf64(db, x)); } +int (dbuf_putf64l)(dbuf *db, double x) + { return (dbuf_putf64l(db, x)); } +int (dbuf_putf64b)(dbuf *db, double x) + { return (dbuf_putf64b(db, x)); } + /*----- That's all, folks -------------------------------------------------*/