+++ /dev/null
-/* -*-c-*-
- *
- * Build table for squaring of binary polynomials
- *
- * (c) 2000 Straylight/Edgeware
- */
-
-/*----- Licensing notice --------------------------------------------------*
- *
- * This file is part of Catacomb.
- *
- * Catacomb is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Library General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * Catacomb is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with Catacomb; if not, write to the Free
- * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- */
-
-/*----- Header files ------------------------------------------------------*/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <mLib/bits.h>
-
-/*----- Main code ---------------------------------------------------------*/
-
-static void mktab(uint16 *t)
-{
- unsigned i, j, x;
-
- for (i = 0; i < 256; i++) {
- x = 0;
- for (j = 0; j < 8; j++) {
- if (i & (1 << j))
- x |= 1 << (2 * j);
- }
- t[i] = x;
- }
-}
-
-int main(void)
-{
- uint16 t[256];
- unsigned i;
-
- mktab(t);
-fputs("\
-/* -*-c-*-\n\
- *\n\
- * Bit spacing table for binary polynomial squaring\n\
- */\n\
-\n\
-#include <mLib/bits.h>\n\
-\n\
-const uint16 gfx_sqrtab[256] = {\n\
- ", stdout);
-
- for (i = 0; i < 256; i++) {
- printf("0x%04x", t[i]);
- if (i == 255)
- puts("\n};");
- else if (i % 8 == 7)
- fputs(",\n ", stdout);
- else
- fputs(", ", stdout);
- }
-
- if (fclose(stdout)) {
- fprintf(stderr, "error writing data\n");
- exit(EXIT_FAILURE);
- }
-
- return (0);
-}
-
-/*----- That's all, folks -------------------------------------------------*/
#include "mpx.h"
#include "gfx.h"
-
-/*----- Static variables --------------------------------------------------*/
-
-extern const uint16 gfx_sqrtab[256];
+#include "permute.h"
/*----- Main code ---------------------------------------------------------*/
void gfx_sqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
{
- mpd a = 0, aa = 0;
- unsigned b = 0, bb = 0;
+#if MPW_BITS <= 16
+# define REGWD 16
+#elif MPW_BITS <= 32
+# define REGWD 32
+#elif MPW_BITS <= 64
+# define REGWD 64
+#elif MPW_BITS <= 128
+# define REGWD 128
+#else
+# error "unsupported limb width: extend `gfx-sqr.c'"
+#endif
+
+#if MPW_BITS == REGWD
+ typedef mpw regty;
+#else
+ typedef mpd regty;
+#endif
+
+ regty a, t;
/* --- Simple stuff --- */
* Our method depends on the fact that, in a field of characteristic 2, we
* have that %$(a + b)^2 = a^2 + b^2$%. Thus, to square a polynomial, it's
* sufficient just to put a zero bit between each of the bits of the
- * original argument. We use a precomputed table for this, and work on
- * entire octets at a time. Life is more complicated because we've got to
- * be careful of bizarre architectures which don't have words with a
- * multiple of 8 bits in them.
+ * original argument.
*/
- for (;;) {
-
- /* --- Input buffering --- */
-
- if (b < 8) {
- if (av >= avl)
- break;
- a |= *av++ << b;
- b += MPW_BITS;
- }
-
- /* --- Do the work in the middle --- */
-
- aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb;
- bb += 16;
- a >>= 8;
- b -= 8;
-
- /* --- Output buffering --- */
-
- if (bb >= MPW_BITS) {
- *dv++ = MPW(aa);
- if (dv >= dvl)
- return;
- aa >>= MPW_BITS;
- bb -= MPW_BITS;
- }
- }
-
- /* --- Flush the input buffer --- */
-
- if (b) for (;;) {
- aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb;
- bb += 16;
- if (bb > MPW_BITS) {
- *dv++ = MPW(aa);
- if (dv >= dvl)
- return;
- aa >>= MPW_BITS;
- bb -= MPW_BITS;
- }
- a >>= 8;
- if (b <= 8)
- break;
- else
- b -= 8;
- }
+ while (av < avl) {
+ a = *av++;
+ /* ..., 7, 6, 5, 4, 3, 2, 1, 0 */
+ SWIZZLE_EXCH(a, 0, 1); /* ..., 7, 6, 5, 4, 3, 2, 0, 1 */
+ SWIZZLE_EXCH(a, 0, 2); /* ..., 7, 6, 5, 4, 3, 1, 0, 2 */
+ SWIZZLE_EXCH(a, 0, 3); /* ..., 7, 6, 5, 4, 2, 1, 0, 3 */
+#if MPW_BITS > 16
+ SWIZZLE_EXCH(a, 0, 4); /* ..., 7, 6, 5, 3, 2, 1, 0, 4 */
+#endif
+#if MPW_BITS > 32
+ SWIZZLE_EXCH(a, 0, 5); /* ..., 7, 6, 4, 3, 2, 1, 0, 5 */
+#endif
+#if MPW_BITS > 64
+ SWIZZLE_EXCH(a, 0, 6); /* ..., 7, 5, 4, 3, 2, 1, 0, 6 */
+#endif
+#if MPW_BITS > 128
+# error "unsupported limb width: extend `gfx-sqr.c'"
+#endif
- /* --- Flush the output buffer --- */
-
- if (bb) for (;;) {
- *dv++ = MPW(aa);
- if (dv >= dvl)
- return;
- aa >>= MPW_BITS;
- if (bb <= MPW_BITS)
- break;
- else
- bb -= MPW_BITS;
+ /* Write out the low half, which consists of the low-order bits in
+ * even-numbered positions.
+ */
+ *dv++ = MPW(a&IXMASK(0)); if (dv >= dvl) break;
+
+ /* Write the high half. This is trickier, since, in general, the bits
+ * are split across even-numbered bits in the high part of the wider
+ * register and odd-numbered bits in the low part. Alas, we can't
+ * resolve this mess without special cases because shifts are broken in
+ * C.
+ */
+ t = ((a >> 1)&IXMASK(0)) << (REGWD - MPW_BITS);
+#if MPW_BITS < REGWD
+ t |= (a&IXMASK(0)) >> MPW_BITS;
+#endif
+ *dv++ = MPW(t); if (dv >= dvl) break;
}
/* --- Zero the rest of everything --- */
MPX_ZERO(dv, dvl);
+
+#undef REGWD
}
/*----- Test rig ----------------------------------------------------------*/